Message ID | 20170621162424.10462-8-rkagan@virtuozzo.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 21 Jun 2017 19:24:08 +0300 Roman Kagan <rkagan@virtuozzo.com> wrote: > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > spec as a sequential number which can't exceed the maximum number of > vCPUs per VM. > > It has to be owned by QEMU in order to preserve it across migration. > > However, the initial implementation in KVM didn't allow to set this > msr, and KVM used its own notion of VP index. Fortunately, the way > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > equal to QEMU cpu_index. > > So choose cpu_index as the value for vp_index, and push that to KVM on > kernels that support setting the msr. On older ones that don't, query > the kernel value and assert that it's in sync with QEMU. > > Besides, since handling errors from vCPU init at hotplug time is > impossible, disable vCPU hotplug. proper place to check if cpu might be created is at pc_cpu_pre_plug() where you can gracefully abort cpu creation process. Also it's possible to create cold-plugged CPUs in out of order sequence using -device cpu-foo on CLI will be hyperv kvm/guest side ok with it? > This patch also introduces accessor functions to wrap the mapping > between a vCPU and its vp_index. Besides, a few variables are renamed > to avoid confusion of vp_index with vcpu_id (== apic_id). > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > --- > v1 -> v2: > - were patches 5, 6 in v1 > - move vp_index initialization to hyperv_init_vcpu > - check capability before trying to set the msr > - set the msr on the usual kvm_put_msrs path > - disable cpu hotplug if msr is not settable > > target/i386/hyperv.h | 5 ++++- > target/i386/hyperv.c | 16 +++++++++++++--- > target/i386/kvm.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h > index 0c3b562..82f4757 100644 > --- a/target/i386/hyperv.h > +++ b/target/i386/hyperv.h > @@ -32,11 +32,14 @@ struct HvSintRoute { > > int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); > > -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, > HvSintAckClb sint_ack_clb); > > void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); > > int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); > > +uint32_t hyperv_vp_index(X86CPU *cpu); > +X86CPU *hyperv_find_vcpu(uint32_t vp_index); > + > #endif > diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c > index 227185c..4f57447 100644 > --- a/target/i386/hyperv.c > +++ b/target/i386/hyperv.c > @@ -16,6 +16,16 @@ > #include "hyperv.h" > #include "hyperv_proto.h" > > +uint32_t hyperv_vp_index(X86CPU *cpu) > +{ > + return CPU(cpu)->cpu_index; > +} > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > +{ > + return X86_CPU(qemu_get_cpu(vp_index)); > +} this helper isn't used in this patch, add it in the patch that would actually use it also if qemu_get_cpu() were called from each CPU init, it would incur O(N^2) complexity, could you do without it? > + > int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) > { > CPUX86State *env = &cpu->env; > @@ -72,7 +82,7 @@ static void kvm_hv_sint_ack_handler(EventNotifier *notifier) > } > } > > -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, > HvSintAckClb sint_ack_clb) > { > HvSintRoute *sint_route; > @@ -92,7 +102,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > event_notifier_set_handler(&sint_route->sint_ack_notifier, > kvm_hv_sint_ack_handler); > > - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vcpu_id, sint); > + gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); > if (gsi < 0) { > goto err_gsi; > } > @@ -105,7 +115,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > } > sint_route->gsi = gsi; > sint_route->sint_ack_clb = sint_ack_clb; > - sint_route->vcpu_id = vcpu_id; > + sint_route->vcpu_id = vp_index; ^^^ - shouldn't it also be re-named? maybe split all renaming into separate patch ... > sint_route->sint = sint; > > return sint_route; > diff --git a/target/i386/kvm.c b/target/i386/kvm.c > index 2795b63..9bf7f08 100644 > --- a/target/i386/kvm.c > +++ b/target/i386/kvm.c > @@ -86,6 +86,7 @@ static bool has_msr_hv_hypercall; > static bool has_msr_hv_crash; > static bool has_msr_hv_reset; > static bool has_msr_hv_vpindex; > +static bool is_hv_vpindex_settable; > static bool has_msr_hv_runtime; > static bool has_msr_hv_synic; > static bool has_msr_hv_stimer; > @@ -665,6 +666,43 @@ static int hyperv_handle_properties(CPUState *cs) > return 0; > } > > +static int hyperv_init_vcpu(X86CPU *cpu) > +{ > + if (cpu->hyperv_vpindex && !is_hv_vpindex_settable) { > + /* > + * the kernel doesn't support setting vp_index; assert that its value > + * is in sync > + */ > + int ret; > + struct { > + struct kvm_msrs info; > + struct kvm_msr_entry entries[1]; > + } msr_data = { > + .info.nmsrs = 1, > + .entries[0].index = HV_X64_MSR_VP_INDEX, > + }; > + > + /* > + * handling errors from vcpu init at hotplug time is impossible, so > + * disallow cpu hotplug > + */ > + MACHINE_GET_CLASS(current_machine)->hot_add_cpu = NULL; one shouldn't alter machine this way nor it would disable cpu hotplug, it would disable only cpu-add interface but device_add() would still work. > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); > + if (ret < 0) { when this can fail? > + return ret; > + } > + assert(ret == 1); > + > + if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { > + fprintf(stderr, "kernel's vp_index != QEMU's vp_index\n"); error_report() > + return -ENXIO; > + } > + } > + > + return 0; > +} > + > static Error *invtsc_mig_blocker; > > #define KVM_MAX_CPUID_ENTRIES 100 > @@ -1013,6 +1051,13 @@ int kvm_arch_init_vcpu(CPUState *cs) > has_msr_tsc_aux = false; > } > > + if (hyperv_enabled(cpu)) { > + r = hyperv_init_vcpu(cpu); > + if (r) { > + goto fail; > + } > + } > + > return 0; > > fail: > @@ -1204,6 +1249,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) > has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); > #endif > > + is_hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); > + > ret = kvm_get_supported_msrs(s); > if (ret < 0) { > return ret; > @@ -1748,6 +1795,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level) > if (has_msr_hv_runtime) { > kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); > } > + if (cpu->hyperv_vpindex && has_msr_hv_vpindex && > + is_hv_vpindex_settable) { > + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); > + } > if (cpu->hyperv_synic) { > int j; >
On Wed, Jun 28, 2017 at 04:47:43PM +0200, Igor Mammedov wrote: > On Wed, 21 Jun 2017 19:24:08 +0300 > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > > spec as a sequential number which can't exceed the maximum number of > > vCPUs per VM. > > > > It has to be owned by QEMU in order to preserve it across migration. > > > > However, the initial implementation in KVM didn't allow to set this > > msr, and KVM used its own notion of VP index. Fortunately, the way > > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > > equal to QEMU cpu_index. > > > > So choose cpu_index as the value for vp_index, and push that to KVM on > > kernels that support setting the msr. On older ones that don't, query > > the kernel value and assert that it's in sync with QEMU. > > > > Besides, since handling errors from vCPU init at hotplug time is > > impossible, disable vCPU hotplug. > proper place to check if cpu might be created is at > pc_cpu_pre_plug() where you can gracefully abort cpu creation process. Thanks for the suggestion, I'll rework it this way. > Also it's possible to create cold-plugged CPUs in out of order > sequence using > -device cpu-foo on CLI > will be hyperv kvm/guest side ok with it? On kernels that support setting HV_X64_MSR_VP_INDEX QEMU will synchronize all sides. On kernels that don't, if out-of-order creation results in vp_index mismatch between the kernel and QEMU, vcpu creation will fail. > > This patch also introduces accessor functions to wrap the mapping > > between a vCPU and its vp_index. Besides, a few variables are renamed > > to avoid confusion of vp_index with vcpu_id (== apic_id). > > > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > > --- > > v1 -> v2: > > - were patches 5, 6 in v1 > > - move vp_index initialization to hyperv_init_vcpu > > - check capability before trying to set the msr > > - set the msr on the usual kvm_put_msrs path > > - disable cpu hotplug if msr is not settable > > > > target/i386/hyperv.h | 5 ++++- > > target/i386/hyperv.c | 16 +++++++++++++--- > > target/i386/kvm.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 68 insertions(+), 4 deletions(-) > > > > diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h > > index 0c3b562..82f4757 100644 > > --- a/target/i386/hyperv.h > > +++ b/target/i386/hyperv.h > > @@ -32,11 +32,14 @@ struct HvSintRoute { > > > > int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); > > > > -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > > +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, > > HvSintAckClb sint_ack_clb); > > > > void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); > > > > int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); > > > > +uint32_t hyperv_vp_index(X86CPU *cpu); > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index); > > + > > #endif > > diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c > > index 227185c..4f57447 100644 > > --- a/target/i386/hyperv.c > > +++ b/target/i386/hyperv.c > > @@ -16,6 +16,16 @@ > > #include "hyperv.h" > > #include "hyperv_proto.h" > > > > +uint32_t hyperv_vp_index(X86CPU *cpu) > > +{ > > + return CPU(cpu)->cpu_index; > > +} > > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > > +{ > > + return X86_CPU(qemu_get_cpu(vp_index)); > > +} > this helper isn't used in this patch, add it in the patch that would actually use it I thought I would put the only two functions that encapsulate the knowledge of how vp_index is realted to cpu_index, in a single patch. I'm now thinking of open-coding the iteration over cpus here and directly look for cpu whose hyperv_vp_index() matches. Then that knowledge will become encapsulated in a single place, and indeed, this helper can go into another patch where it's used. > also if qemu_get_cpu() were called from each CPU init, > it would incur O(N^2) complexity, could you do without it? It isn't called on hot paths (ATM it's called only when SINT routes are created, which is at most one per cpu). I don't see a problem here. > > @@ -105,7 +115,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > > } > > sint_route->gsi = gsi; > > sint_route->sint_ack_clb = sint_ack_clb; > > - sint_route->vcpu_id = vcpu_id; > > + sint_route->vcpu_id = vp_index; > ^^^ - shouldn't it also be re-named? Right, but vcpu_id on sint_route is replaced with X86CPU pointer in a followup patch, so I wasn't sure if it was worth while to add more churn... > > maybe split all renaming into separate patch ... Part of the renaming will disappear eventually in the followup patches, so I'm sure it's a good idea... Opinions? > > +static int hyperv_init_vcpu(X86CPU *cpu) > > +{ > > + if (cpu->hyperv_vpindex && !is_hv_vpindex_settable) { > > + /* > > + * the kernel doesn't support setting vp_index; assert that its value > > + * is in sync > > + */ > > + int ret; > > + struct { > > + struct kvm_msrs info; > > + struct kvm_msr_entry entries[1]; > > + } msr_data = { > > + .info.nmsrs = 1, > > + .entries[0].index = HV_X64_MSR_VP_INDEX, > > + }; > > + > > + /* > > + * handling errors from vcpu init at hotplug time is impossible, so > > + * disallow cpu hotplug > > + */ > > + MACHINE_GET_CLASS(current_machine)->hot_add_cpu = NULL; > one shouldn't alter machine this way nor > it would disable cpu hotplug, it would disable only cpu-add interface > but device_add() would still work. Understood, will rework as you suggest above. > > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); > > + if (ret < 0) { > when this can fail? Dunno, but not checking for errors doesn't look good. > > + return ret; > > + } > > + assert(ret == 1); > > + > > + if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { > > + fprintf(stderr, "kernel's vp_index != QEMU's vp_index\n"); > error_report() OK (target/i386/kvm.c is already a mix of all possible styles of error reporting, I must've followed a wrong one). Thanks, Roman.
On Thu, 29 Jun 2017 12:53:27 +0300 Roman Kagan <rkagan@virtuozzo.com> wrote: > On Wed, Jun 28, 2017 at 04:47:43PM +0200, Igor Mammedov wrote: > > On Wed, 21 Jun 2017 19:24:08 +0300 > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > > > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > > > spec as a sequential number which can't exceed the maximum number of > > > vCPUs per VM. > > > > > > It has to be owned by QEMU in order to preserve it across migration. > > > > > > However, the initial implementation in KVM didn't allow to set this > > > msr, and KVM used its own notion of VP index. Fortunately, the way > > > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > > > equal to QEMU cpu_index. > > > > > > So choose cpu_index as the value for vp_index, and push that to KVM on > > > kernels that support setting the msr. On older ones that don't, query > > > the kernel value and assert that it's in sync with QEMU. > > > > > > Besides, since handling errors from vCPU init at hotplug time is > > > impossible, disable vCPU hotplug. > > proper place to check if cpu might be created is at > > pc_cpu_pre_plug() where you can gracefully abort cpu creation process. > > Thanks for the suggestion, I'll rework it this way. > > > Also it's possible to create cold-plugged CPUs in out of order > > sequence using > > -device cpu-foo on CLI > > will be hyperv kvm/guest side ok with it? > > On kernels that support setting HV_X64_MSR_VP_INDEX QEMU will > synchronize all sides. On kernels that don't, if out-of-order creation > results in vp_index mismatch between the kernel and QEMU, vcpu creation > will fail. And additional question, what would happen if VM is started on host supporting VP index setting and then migrated to a host without it? > > > > This patch also introduces accessor functions to wrap the mapping > > > between a vCPU and its vp_index. Besides, a few variables are renamed > > > to avoid confusion of vp_index with vcpu_id (== apic_id). > > > > > > Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> > > > --- > > > v1 -> v2: > > > - were patches 5, 6 in v1 > > > - move vp_index initialization to hyperv_init_vcpu > > > - check capability before trying to set the msr > > > - set the msr on the usual kvm_put_msrs path > > > - disable cpu hotplug if msr is not settable > > > > > > target/i386/hyperv.h | 5 ++++- > > > target/i386/hyperv.c | 16 +++++++++++++--- > > > target/i386/kvm.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ > > > 3 files changed, 68 insertions(+), 4 deletions(-) > > > > > > diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h > > > index 0c3b562..82f4757 100644 > > > --- a/target/i386/hyperv.h > > > +++ b/target/i386/hyperv.h > > > @@ -32,11 +32,14 @@ struct HvSintRoute { > > > > > > int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); > > > > > > -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > > > +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, > > > HvSintAckClb sint_ack_clb); > > > > > > void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); > > > > > > int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); > > > > > > +uint32_t hyperv_vp_index(X86CPU *cpu); > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index); > > > + > > > #endif > > > diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c > > > index 227185c..4f57447 100644 > > > --- a/target/i386/hyperv.c > > > +++ b/target/i386/hyperv.c > > > @@ -16,6 +16,16 @@ > > > #include "hyperv.h" > > > #include "hyperv_proto.h" > > > > > > +uint32_t hyperv_vp_index(X86CPU *cpu) > > > +{ > > > + return CPU(cpu)->cpu_index; > > > +} > > > > > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > > > +{ > > > + return X86_CPU(qemu_get_cpu(vp_index)); > > > +} > > this helper isn't used in this patch, add it in the patch that would actually use it > > I thought I would put the only two functions that encapsulate the > knowledge of how vp_index is realted to cpu_index, in a single patch. > > I'm now thinking of open-coding the iteration over cpus here and > directly look for cpu whose hyperv_vp_index() matches. Then that > knowledge will become encapsulated in a single place, and indeed, this > helper can go into another patch where it's used. > > > also if qemu_get_cpu() were called from each CPU init, > > it would incur O(N^2) complexity, could you do without it? > > It isn't called on hot paths (ATM it's called only when SINT routes are > created, which is at most one per cpu). I don't see a problem here. For what/where do you need this lookup? > > > > @@ -105,7 +115,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, > > > } > > > sint_route->gsi = gsi; > > > sint_route->sint_ack_clb = sint_ack_clb; > > > - sint_route->vcpu_id = vcpu_id; > > > + sint_route->vcpu_id = vp_index; > > ^^^ - shouldn't it also be re-named? > > Right, but vcpu_id on sint_route is replaced with X86CPU pointer in a > followup patch, so I wasn't sure if it was worth while to add more > churn... > > > > > maybe split all renaming into separate patch ... > > Part of the renaming will disappear eventually in the followup patches, > so I'm sure it's a good idea... Opinions? I'd still spit. not to distract reviewers from what this patch is actually tries to accomplish at least > > > > +static int hyperv_init_vcpu(X86CPU *cpu) > > > +{ > > > + if (cpu->hyperv_vpindex && !is_hv_vpindex_settable) { > > > + /* > > > + * the kernel doesn't support setting vp_index; assert that its value > > > + * is in sync > > > + */ > > > + int ret; > > > + struct { > > > + struct kvm_msrs info; > > > + struct kvm_msr_entry entries[1]; > > > + } msr_data = { > > > + .info.nmsrs = 1, > > > + .entries[0].index = HV_X64_MSR_VP_INDEX, > > > + }; > > > + > > > + /* > > > + * handling errors from vcpu init at hotplug time is impossible, so > > > + * disallow cpu hotplug > > > + */ > > > + MACHINE_GET_CLASS(current_machine)->hot_add_cpu = NULL; > > one shouldn't alter machine this way nor > > it would disable cpu hotplug, it would disable only cpu-add interface > > but device_add() would still work. > > Understood, will rework as you suggest above. > > > > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); > > > + if (ret < 0) { > > when this can fail? > > Dunno, but not checking for errors doesn't look good. Point of asking is to confirm that call shouldn't fail normally and if it fails it's we can't recover and should die. Looking at condition above !is_hv_vpindex_settable it looks like call wouldn't fail. (i.e. it makes sure that qemu is running on supported kernel) > > > > + return ret; > > > + } > > > + assert(ret == 1); > > > + > > > + if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { > > > + fprintf(stderr, "kernel's vp_index != QEMU's vp_index\n"); > > error_report() > > OK (target/i386/kvm.c is already a mix of all possible styles of error > reporting, I must've followed a wrong one). > > Thanks, > Roman.
On Thu, Jun 29, 2017 at 01:53:29PM +0200, Igor Mammedov wrote: > On Thu, 29 Jun 2017 12:53:27 +0300 > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > On Wed, Jun 28, 2017 at 04:47:43PM +0200, Igor Mammedov wrote: > > > On Wed, 21 Jun 2017 19:24:08 +0300 > > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > > > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > > > > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > > > > spec as a sequential number which can't exceed the maximum number of > > > > vCPUs per VM. > > > > > > > > It has to be owned by QEMU in order to preserve it across migration. > > > > > > > > However, the initial implementation in KVM didn't allow to set this > > > > msr, and KVM used its own notion of VP index. Fortunately, the way > > > > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > > > > equal to QEMU cpu_index. > > > > > > > > So choose cpu_index as the value for vp_index, and push that to KVM on > > > > kernels that support setting the msr. On older ones that don't, query > > > > the kernel value and assert that it's in sync with QEMU. > > > > > > > > Besides, since handling errors from vCPU init at hotplug time is > > > > impossible, disable vCPU hotplug. > > > proper place to check if cpu might be created is at > > > pc_cpu_pre_plug() where you can gracefully abort cpu creation process. > > > > Thanks for the suggestion, I'll rework it this way. > > > > > Also it's possible to create cold-plugged CPUs in out of order > > > sequence using > > > -device cpu-foo on CLI > > > will be hyperv kvm/guest side ok with it? > > > > On kernels that support setting HV_X64_MSR_VP_INDEX QEMU will > > synchronize all sides. On kernels that don't, if out-of-order creation > > results in vp_index mismatch between the kernel and QEMU, vcpu creation > > will fail. > > And additional question, > what would happen if VM is started on host supporting VP index setting > and then migrated to a host without it? The destination QEMU will attempt to initialize vCPUs, and if that fails (e.g. due to vp_index mismatch), the migration will be aborted and the source VM will continue running. If the destination QEMU is old, too, there's a chance that vp_index will change. Then we just keep the fingers crossed that the guest doesn't notice (this is the behavior we have now). > > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > > > > +{ > > > > + return X86_CPU(qemu_get_cpu(vp_index)); > > > > +} > > > this helper isn't used in this patch, add it in the patch that would actually use it > > > > I thought I would put the only two functions that encapsulate the > > knowledge of how vp_index is realted to cpu_index, in a single patch. > > > > I'm now thinking of open-coding the iteration over cpus here and > > directly look for cpu whose hyperv_vp_index() matches. Then that > > knowledge will become encapsulated in a single place, and indeed, this > > helper can go into another patch where it's used. > > > > > also if qemu_get_cpu() were called from each CPU init, > > > it would incur O(N^2) complexity, could you do without it? > > > > It isn't called on hot paths (ATM it's called only when SINT routes are > > created, which is at most one per cpu). I don't see a problem here. > For what/where do you need this lookup? The guest configures devices to signal their events with synthetic interrupts on specific cpus, identified by vp_index. When we receive such a request we look up the cpu and set up a SINT route to be able to deliver interrupts to its synic. Or did I misunderstand the question perhaps? > > > maybe split all renaming into separate patch ... > > > > Part of the renaming will disappear eventually in the followup patches, > > so I'm sure it's a good idea... Opinions? > I'd still spit. not to distract reviewers from > what this patch is actually tries to accomplish at least OK, makes sense. > > > > + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); > > > > + if (ret < 0) { > > > when this can fail? > > > > Dunno, but not checking for errors doesn't look good. > Point of asking is to confirm that call shouldn't fail normally and > if it fails it's we can't recover and should die. > > Looking at condition above !is_hv_vpindex_settable it looks like > call wouldn't fail. (i.e. it makes sure that qemu is running on > supported kernel) No, I do KVM_GET_MSRS here, which is supported since this msr was introduced. But actually you've spot a bug which I meant to fix but forgot: the code in hyperv_handle_properties should fail if a particular hyperv feature is requested but the corresponding msr isn't found among reported via KVM_GET_MSR_INDEX_LIST. ATM this is only done for some features but not all. I'll try to fix that; once done, cpu->hyperv_vpindex here will make sure this msr is gettable. Thanks, Roman.
On Thu, 29 Jun 2017 16:10:20 +0300 Roman Kagan <rkagan@virtuozzo.com> wrote: > On Thu, Jun 29, 2017 at 01:53:29PM +0200, Igor Mammedov wrote: > > On Thu, 29 Jun 2017 12:53:27 +0300 > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > On Wed, Jun 28, 2017 at 04:47:43PM +0200, Igor Mammedov wrote: > > > > On Wed, 21 Jun 2017 19:24:08 +0300 > > > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > > > > > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > > > > > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > > > > > spec as a sequential number which can't exceed the maximum number of > > > > > vCPUs per VM. > > > > > > > > > > It has to be owned by QEMU in order to preserve it across migration. > > > > > > > > > > However, the initial implementation in KVM didn't allow to set this > > > > > msr, and KVM used its own notion of VP index. Fortunately, the way > > > > > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > > > > > equal to QEMU cpu_index. > > > > > > > > > > So choose cpu_index as the value for vp_index, and push that to KVM on > > > > > kernels that support setting the msr. On older ones that don't, query > > > > > the kernel value and assert that it's in sync with QEMU. > > > > > > > > > > Besides, since handling errors from vCPU init at hotplug time is > > > > > impossible, disable vCPU hotplug. > > > > proper place to check if cpu might be created is at > > > > pc_cpu_pre_plug() where you can gracefully abort cpu creation process. > > > > > > Thanks for the suggestion, I'll rework it this way. > > > > > > > Also it's possible to create cold-plugged CPUs in out of order > > > > sequence using > > > > -device cpu-foo on CLI > > > > will be hyperv kvm/guest side ok with it? > > > > > > On kernels that support setting HV_X64_MSR_VP_INDEX QEMU will > > > synchronize all sides. On kernels that don't, if out-of-order creation > > > results in vp_index mismatch between the kernel and QEMU, vcpu creation > > > will fail. > > > > And additional question, > > what would happen if VM is started on host supporting VP index setting > > and then migrated to a host without it? > > The destination QEMU will attempt to initialize vCPUs, and if that > fails (e.g. due to vp_index mismatch), the migration will be aborted and > the source VM will continue running. > > If the destination QEMU is old, too, there's a chance that vp_index will > change. Then we just keep the fingers crossed that the guest doesn't > notice (this is the behavior we have now). on source, putting in migration stream a flag that setting HV_X64_MSR_VP_INDEX is in use, should prevent migration to the old destination or new destination but without kernel support. It also might make sense to disable feature for old machine types so new->old migration would work as it used to be even if destination kernel supports feature. > > > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > > > > > +{ > > > > > + return X86_CPU(qemu_get_cpu(vp_index)); > > > > > +} > > > > this helper isn't used in this patch, add it in the patch that would actually use it > > > > > > I thought I would put the only two functions that encapsulate the > > > knowledge of how vp_index is realted to cpu_index, in a single patch. > > > > > > I'm now thinking of open-coding the iteration over cpus here and > > > directly look for cpu whose hyperv_vp_index() matches. Then that > > > knowledge will become encapsulated in a single place, and indeed, this > > > helper can go into another patch where it's used. > > > > > > > also if qemu_get_cpu() were called from each CPU init, > > > > it would incur O(N^2) complexity, could you do without it? > > > > > > It isn't called on hot paths (ATM it's called only when SINT routes are > > > created, which is at most one per cpu). I don't see a problem here. > > For what/where do you need this lookup? > > The guest configures devices to signal their events with synthetic > interrupts on specific cpus, identified by vp_index. When we receive > such a request we look up the cpu and set up a SINT route to be able to > deliver interrupts to its synic. > > Or did I misunderstand the question perhaps? since there is 1:1 mapping between synic:vp_index and vp_index is dense interval of [0..maxcpus), I'd suggest to maintain internal synic map where vp_index could be used as index in array to fetch addressed synic. look for local_apics as example. [...]
On Thu, Jun 29, 2017 at 04:39:00PM +0200, Igor Mammedov wrote: > On Thu, 29 Jun 2017 16:10:20 +0300 > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > On Thu, Jun 29, 2017 at 01:53:29PM +0200, Igor Mammedov wrote: > > > On Thu, 29 Jun 2017 12:53:27 +0300 > > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > > > On Wed, Jun 28, 2017 at 04:47:43PM +0200, Igor Mammedov wrote: > > > > > On Wed, 21 Jun 2017 19:24:08 +0300 > > > > > Roman Kagan <rkagan@virtuozzo.com> wrote: > > > > > > > > > > > Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be > > > > > > queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the > > > > > > spec as a sequential number which can't exceed the maximum number of > > > > > > vCPUs per VM. > > > > > > > > > > > > It has to be owned by QEMU in order to preserve it across migration. > > > > > > > > > > > > However, the initial implementation in KVM didn't allow to set this > > > > > > msr, and KVM used its own notion of VP index. Fortunately, the way > > > > > > vCPUs are created in QEMU/KVM makes it likely that the KVM value is > > > > > > equal to QEMU cpu_index. > > > > > > > > > > > > So choose cpu_index as the value for vp_index, and push that to KVM on > > > > > > kernels that support setting the msr. On older ones that don't, query > > > > > > the kernel value and assert that it's in sync with QEMU. > > > > > > > > > > > > Besides, since handling errors from vCPU init at hotplug time is > > > > > > impossible, disable vCPU hotplug. > > > > > proper place to check if cpu might be created is at > > > > > pc_cpu_pre_plug() where you can gracefully abort cpu creation process. > > > > > > > > Thanks for the suggestion, I'll rework it this way. > > > > > > > > > Also it's possible to create cold-plugged CPUs in out of order > > > > > sequence using > > > > > -device cpu-foo on CLI > > > > > will be hyperv kvm/guest side ok with it? > > > > > > > > On kernels that support setting HV_X64_MSR_VP_INDEX QEMU will > > > > synchronize all sides. On kernels that don't, if out-of-order creation > > > > results in vp_index mismatch between the kernel and QEMU, vcpu creation > > > > will fail. > > > > > > And additional question, > > > what would happen if VM is started on host supporting VP index setting > > > and then migrated to a host without it? > > > > The destination QEMU will attempt to initialize vCPUs, and if that > > fails (e.g. due to vp_index mismatch), the migration will be aborted and > > the source VM will continue running. > > > > If the destination QEMU is old, too, there's a chance that vp_index will > > change. Then we just keep the fingers crossed that the guest doesn't > > notice (this is the behavior we have now). > on source, putting in migration stream a flag that setting HV_X64_MSR_VP_INDEX > is in use, should prevent migration to the old destination or new destination but > without kernel support. These are different cases. New destination QEMU can verify that all vcpus have the desired vp_index even if it can't set it, so in this case vp_index migration is even reliable. Old QEMU didn't bother so it potentially can confuse the guest. But we're unaware of this ever happening in the past, probably because the existing users of synic are only in-kvm synic timers which don't depend on vp_index. > It also might make sense to disable feature for old machine types > so new->old migration would work as it used to be even if > destination kernel supports feature. I'm not sure it's worth the effort, especially since other patches in the series introduce "in-kvm-only" flag in SynIC, which is "on" for old machine types. So eventually the migration to/from an old QEMU will only be possible for configurations with only in-kvm synic users, where we hope vp_index not to matter. > > > > > > +X86CPU *hyperv_find_vcpu(uint32_t vp_index) > > > > > > +{ > > > > > > + return X86_CPU(qemu_get_cpu(vp_index)); > > > > > > +} > > > > > this helper isn't used in this patch, add it in the patch that would actually use it > > > > > > > > I thought I would put the only two functions that encapsulate the > > > > knowledge of how vp_index is realted to cpu_index, in a single patch. > > > > > > > > I'm now thinking of open-coding the iteration over cpus here and > > > > directly look for cpu whose hyperv_vp_index() matches. Then that > > > > knowledge will become encapsulated in a single place, and indeed, this > > > > helper can go into another patch where it's used. > > > > > > > > > also if qemu_get_cpu() were called from each CPU init, > > > > > it would incur O(N^2) complexity, could you do without it? > > > > > > > > It isn't called on hot paths (ATM it's called only when SINT routes are > > > > created, which is at most one per cpu). I don't see a problem here. > > > For what/where do you need this lookup? > > > > The guest configures devices to signal their events with synthetic > > interrupts on specific cpus, identified by vp_index. When we receive > > such a request we look up the cpu and set up a SINT route to be able to > > deliver interrupts to its synic. > > > > Or did I misunderstand the question perhaps? > since there is 1:1 mapping between synic:vp_index and > vp_index is dense interval of [0..maxcpus), > I'd suggest to maintain internal synic map where vp_index > could be used as index in array to fetch addressed synic. Ah, I see. I wonder why qemu_get_cpu() itself isn't implemented this way? Roman.
diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h index 0c3b562..82f4757 100644 --- a/target/i386/hyperv.h +++ b/target/i386/hyperv.h @@ -32,11 +32,14 @@ struct HvSintRoute { int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, HvSintAckClb sint_ack_clb); void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); +uint32_t hyperv_vp_index(X86CPU *cpu); +X86CPU *hyperv_find_vcpu(uint32_t vp_index); + #endif diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c index 227185c..4f57447 100644 --- a/target/i386/hyperv.c +++ b/target/i386/hyperv.c @@ -16,6 +16,16 @@ #include "hyperv.h" #include "hyperv_proto.h" +uint32_t hyperv_vp_index(X86CPU *cpu) +{ + return CPU(cpu)->cpu_index; +} + +X86CPU *hyperv_find_vcpu(uint32_t vp_index) +{ + return X86_CPU(qemu_get_cpu(vp_index)); +} + int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) { CPUX86State *env = &cpu->env; @@ -72,7 +82,7 @@ static void kvm_hv_sint_ack_handler(EventNotifier *notifier) } } -HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, +HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, HvSintAckClb sint_ack_clb) { HvSintRoute *sint_route; @@ -92,7 +102,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, event_notifier_set_handler(&sint_route->sint_ack_notifier, kvm_hv_sint_ack_handler); - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vcpu_id, sint); + gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); if (gsi < 0) { goto err_gsi; } @@ -105,7 +115,7 @@ HvSintRoute *kvm_hv_sint_route_create(uint32_t vcpu_id, uint32_t sint, } sint_route->gsi = gsi; sint_route->sint_ack_clb = sint_ack_clb; - sint_route->vcpu_id = vcpu_id; + sint_route->vcpu_id = vp_index; sint_route->sint = sint; return sint_route; diff --git a/target/i386/kvm.c b/target/i386/kvm.c index 2795b63..9bf7f08 100644 --- a/target/i386/kvm.c +++ b/target/i386/kvm.c @@ -86,6 +86,7 @@ static bool has_msr_hv_hypercall; static bool has_msr_hv_crash; static bool has_msr_hv_reset; static bool has_msr_hv_vpindex; +static bool is_hv_vpindex_settable; static bool has_msr_hv_runtime; static bool has_msr_hv_synic; static bool has_msr_hv_stimer; @@ -665,6 +666,43 @@ static int hyperv_handle_properties(CPUState *cs) return 0; } +static int hyperv_init_vcpu(X86CPU *cpu) +{ + if (cpu->hyperv_vpindex && !is_hv_vpindex_settable) { + /* + * the kernel doesn't support setting vp_index; assert that its value + * is in sync + */ + int ret; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = HV_X64_MSR_VP_INDEX, + }; + + /* + * handling errors from vcpu init at hotplug time is impossible, so + * disallow cpu hotplug + */ + MACHINE_GET_CLASS(current_machine)->hot_add_cpu = NULL; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + assert(ret == 1); + + if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { + fprintf(stderr, "kernel's vp_index != QEMU's vp_index\n"); + return -ENXIO; + } + } + + return 0; +} + static Error *invtsc_mig_blocker; #define KVM_MAX_CPUID_ENTRIES 100 @@ -1013,6 +1051,13 @@ int kvm_arch_init_vcpu(CPUState *cs) has_msr_tsc_aux = false; } + if (hyperv_enabled(cpu)) { + r = hyperv_init_vcpu(cpu); + if (r) { + goto fail; + } + } + return 0; fail: @@ -1204,6 +1249,8 @@ int kvm_arch_init(MachineState *ms, KVMState *s) has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); #endif + is_hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); + ret = kvm_get_supported_msrs(s); if (ret < 0) { return ret; @@ -1748,6 +1795,10 @@ static int kvm_put_msrs(X86CPU *cpu, int level) if (has_msr_hv_runtime) { kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); } + if (cpu->hyperv_vpindex && has_msr_hv_vpindex && + is_hv_vpindex_settable) { + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); + } if (cpu->hyperv_synic) { int j;
Hyper-V identifies vCPUs by Virtual Processor (VP) index which can be queried by the guest via HV_X64_MSR_VP_INDEX msr. It is defined by the spec as a sequential number which can't exceed the maximum number of vCPUs per VM. It has to be owned by QEMU in order to preserve it across migration. However, the initial implementation in KVM didn't allow to set this msr, and KVM used its own notion of VP index. Fortunately, the way vCPUs are created in QEMU/KVM makes it likely that the KVM value is equal to QEMU cpu_index. So choose cpu_index as the value for vp_index, and push that to KVM on kernels that support setting the msr. On older ones that don't, query the kernel value and assert that it's in sync with QEMU. Besides, since handling errors from vCPU init at hotplug time is impossible, disable vCPU hotplug. This patch also introduces accessor functions to wrap the mapping between a vCPU and its vp_index. Besides, a few variables are renamed to avoid confusion of vp_index with vcpu_id (== apic_id). Signed-off-by: Roman Kagan <rkagan@virtuozzo.com> --- v1 -> v2: - were patches 5, 6 in v1 - move vp_index initialization to hyperv_init_vcpu - check capability before trying to set the msr - set the msr on the usual kvm_put_msrs path - disable cpu hotplug if msr is not settable target/i386/hyperv.h | 5 ++++- target/i386/hyperv.c | 16 +++++++++++++--- target/i386/kvm.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 4 deletions(-)