Message ID | 20220222154642.684285-5-vkuznets@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: x86: hyper-v: XMM fast hypercalls fixes | expand |
On Tue, 2022-02-22 at 16:46 +0100, Vitaly Kuznetsov wrote: > It has been proven on practice that at least Windows Server 2019 tries > using HVCALL_SEND_IPI_EX in 'XMM fast' mode when it has more than 64 vCPUs > and it needs to send an IPI to a vCPU > 63. Similarly to other XMM Fast > hypercalls (HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}{,_EX}), this > information is missing in TLFS as of 6.0b. Currently, KVM returns an error > (HV_STATUS_INVALID_HYPERCALL_INPUT) and Windows crashes. > > Note, HVCALL_SEND_IPI is a 'standard' fast hypercall (not 'XMM fast') as > all its parameters fit into RDX:R8 and this is handled by KVM correctly. > > Fixes: d8f5537a8816 ("KVM: hyper-v: Advertise support for fast XMM hypercalls") > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> > --- > arch/x86/kvm/hyperv.c | 52 ++++++++++++++++++++++++++++--------------- > 1 file changed, 34 insertions(+), 18 deletions(-) > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 6dda93bf98ae..3060057bdfd4 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -1890,6 +1890,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) > int sparse_banks_len; > u32 vector; > bool all_cpus; > + int i; > > if (hc->code == HVCALL_SEND_IPI) { > if (!hc->fast) { > @@ -1910,9 +1911,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) > > trace_kvm_hv_send_ipi(vector, sparse_banks[0]); > } else { > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, > - sizeof(send_ipi_ex)))) > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > + if (!hc->fast) { > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, > + sizeof(send_ipi_ex)))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } else { > + send_ipi_ex.vector = (u32)hc->ingpa; > + send_ipi_ex.vp_set.format = hc->outgpa; > + send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]); > + } > > trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, > send_ipi_ex.vp_set.format, > @@ -1920,8 +1927,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) > > vector = send_ipi_ex.vector; > valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; > - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * > - sizeof(sparse_banks[0]); > + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64); Is this change intentional? I haven't fully reviewed this, because kvm/queue seem to have a bit different version of this, and I didn't fully follow on all of this. Best regards, Maxim Levitsky > > all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; > > @@ -1931,12 +1937,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) > if (!sparse_banks_len) > goto ret_success; > > - if (kvm_read_guest(kvm, > - hc->ingpa + offsetof(struct hv_send_ipi_ex, > - vp_set.bank_contents), > - sparse_banks, > - sparse_banks_len)) > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > + if (!hc->fast) { > + if (kvm_read_guest(kvm, > + hc->ingpa + offsetof(struct hv_send_ipi_ex, > + vp_set.bank_contents), > + sparse_banks, > + sparse_banks_len * sizeof(sparse_banks[0]))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } else { > + /* > + * The lower half of XMM0 is already consumed, each XMM holds > + * two sparse banks. > + */ > + if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + for (i = 0; i < sparse_banks_len; i++) { > + if (i % 2) > + sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]); > + else > + sparse_banks[i] = sse128_hi(hc->xmm[i / 2]); > + } > + } > } > > check_and_send_ipi: > @@ -2098,6 +2119,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: > + case HVCALL_SEND_IPI_EX: > return true; > } > > @@ -2265,14 +2287,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > ret = kvm_hv_flush_tlb(vcpu, &hc); > break; > case HVCALL_SEND_IPI: > - if (unlikely(hc.rep)) { > - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > - break; > - } > - ret = kvm_hv_send_ipi(vcpu, &hc); > - break; > case HVCALL_SEND_IPI_EX: > - if (unlikely(hc.fast || hc.rep)) { > + if (unlikely(hc.rep)) { > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > break; > }
Maxim Levitsky <mlevitsk@redhat.com> writes: > On Tue, 2022-02-22 at 16:46 +0100, Vitaly Kuznetsov wrote: >> It has been proven on practice that at least Windows Server 2019 tries >> using HVCALL_SEND_IPI_EX in 'XMM fast' mode when it has more than 64 vCPUs >> and it needs to send an IPI to a vCPU > 63. Similarly to other XMM Fast >> hypercalls (HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}{,_EX}), this >> information is missing in TLFS as of 6.0b. Currently, KVM returns an error >> (HV_STATUS_INVALID_HYPERCALL_INPUT) and Windows crashes. >> >> Note, HVCALL_SEND_IPI is a 'standard' fast hypercall (not 'XMM fast') as >> all its parameters fit into RDX:R8 and this is handled by KVM correctly. >> >> Fixes: d8f5537a8816 ("KVM: hyper-v: Advertise support for fast XMM hypercalls") >> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> >> --- >> arch/x86/kvm/hyperv.c | 52 ++++++++++++++++++++++++++++--------------- >> 1 file changed, 34 insertions(+), 18 deletions(-) >> >> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c >> index 6dda93bf98ae..3060057bdfd4 100644 >> --- a/arch/x86/kvm/hyperv.c >> +++ b/arch/x86/kvm/hyperv.c >> @@ -1890,6 +1890,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> int sparse_banks_len; >> u32 vector; >> bool all_cpus; >> + int i; >> >> if (hc->code == HVCALL_SEND_IPI) { >> if (!hc->fast) { >> @@ -1910,9 +1911,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> >> trace_kvm_hv_send_ipi(vector, sparse_banks[0]); >> } else { >> - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, >> - sizeof(send_ipi_ex)))) >> - return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + if (!hc->fast) { >> + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, >> + sizeof(send_ipi_ex)))) >> + return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + } else { >> + send_ipi_ex.vector = (u32)hc->ingpa; >> + send_ipi_ex.vp_set.format = hc->outgpa; >> + send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]); >> + } >> >> trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, >> send_ipi_ex.vp_set.format, >> @@ -1920,8 +1927,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> >> vector = send_ipi_ex.vector; >> valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; >> - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * >> - sizeof(sparse_banks[0]); >> + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64); > Is this change intentional? > Yes it is. Previously, 'sparse_banks_len' was the number of bytes to read, now it's in u64-s. (see below) > I haven't fully reviewed this, because kvm/queue seem to have a bit different > version of this, and I didn't fully follow on all of this. > >> >> all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; >> >> @@ -1931,12 +1937,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) >> if (!sparse_banks_len) >> goto ret_success; >> >> - if (kvm_read_guest(kvm, >> - hc->ingpa + offsetof(struct hv_send_ipi_ex, >> - vp_set.bank_contents), >> - sparse_banks, >> - sparse_banks_len)) >> - return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + if (!hc->fast) { >> + if (kvm_read_guest(kvm, >> + hc->ingpa + offsetof(struct hv_send_ipi_ex, >> + vp_set.bank_contents), >> + sparse_banks, >> + sparse_banks_len * sizeof(sparse_banks[0]))) ^^^ here ^^^ >> + return HV_STATUS_INVALID_HYPERCALL_INPUT; >> + } else { >> + /* >> + * The lower half of XMM0 is already consumed, each XMM holds >> + * two sparse banks. >> + */ >> + if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) >> + return HV_STATUS_INVALID_HYPERCALL_INPUT; And here. This is the reason for change: it's more convenient to count it 'xmm halves' than in bytes. >> + for (i = 0; i < sparse_banks_len; i++) { >> + if (i % 2) >> + sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]); >> + else >> + sparse_banks[i] = sse128_hi(hc->xmm[i / 2]); >> + } >> + } >> } >> >> check_and_send_ipi: >> @@ -2098,6 +2119,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) >> case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: >> case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: >> case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: >> + case HVCALL_SEND_IPI_EX: >> return true; >> } >> >> @@ -2265,14 +2287,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) >> ret = kvm_hv_flush_tlb(vcpu, &hc); >> break; >> case HVCALL_SEND_IPI: >> - if (unlikely(hc.rep)) { >> - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; >> - break; >> - } >> - ret = kvm_hv_send_ipi(vcpu, &hc); >> - break; >> case HVCALL_SEND_IPI_EX: >> - if (unlikely(hc.fast || hc.rep)) { >> + if (unlikely(hc.rep)) { >> ret = HV_STATUS_INVALID_HYPERCALL_INPUT; >> break; >> } > >
On Tue, Feb 22, 2022 at 04:46:42PM +0100, Vitaly Kuznetsov wrote: > CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe. > > > > It has been proven on practice that at least Windows Server 2019 tries > using HVCALL_SEND_IPI_EX in 'XMM fast' mode when it has more than 64 vCPUs > and it needs to send an IPI to a vCPU > 63. Similarly to other XMM Fast > hypercalls (HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}{,_EX}), this > information is missing in TLFS as of 6.0b. Currently, KVM returns an error > (HV_STATUS_INVALID_HYPERCALL_INPUT) and Windows crashes. > > Note, HVCALL_SEND_IPI is a 'standard' fast hypercall (not 'XMM fast') as > all its parameters fit into RDX:R8 and this is handled by KVM correctly. > > Fixes: d8f5537a8816 ("KVM: hyper-v: Advertise support for fast XMM hypercalls") > Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Reviewed-by: Siddharth Chandrasekaran <sidcha@amazon.de> Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6dda93bf98ae..3060057bdfd4 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1890,6 +1890,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) int sparse_banks_len; u32 vector; bool all_cpus; + int i; if (hc->code == HVCALL_SEND_IPI) { if (!hc->fast) { @@ -1910,9 +1911,15 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) trace_kvm_hv_send_ipi(vector, sparse_banks[0]); } else { - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, - sizeof(send_ipi_ex)))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex, + sizeof(send_ipi_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + send_ipi_ex.vector = (u32)hc->ingpa; + send_ipi_ex.vp_set.format = hc->outgpa; + send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]); + } trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector, send_ipi_ex.vp_set.format, @@ -1920,8 +1927,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) vector = send_ipi_ex.vector; valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask; - sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * - sizeof(sparse_banks[0]); + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64); all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; @@ -1931,12 +1937,27 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!sparse_banks_len) goto ret_success; - if (kvm_read_guest(kvm, - hc->ingpa + offsetof(struct hv_send_ipi_ex, - vp_set.bank_contents), - sparse_banks, - sparse_banks_len)) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!hc->fast) { + if (kvm_read_guest(kvm, + hc->ingpa + offsetof(struct hv_send_ipi_ex, + vp_set.bank_contents), + sparse_banks, + sparse_banks_len * sizeof(sparse_banks[0]))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } else { + /* + * The lower half of XMM0 is already consumed, each XMM holds + * two sparse banks. + */ + if (sparse_banks_len > (2 * HV_HYPERCALL_MAX_XMM_REGISTERS - 1)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + for (i = 0; i < sparse_banks_len; i++) { + if (i % 2) + sparse_banks[i] = sse128_lo(hc->xmm[(i + 1) / 2]); + else + sparse_banks[i] = sse128_hi(hc->xmm[i / 2]); + } + } } check_and_send_ipi: @@ -2098,6 +2119,7 @@ static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: + case HVCALL_SEND_IPI_EX: return true; } @@ -2265,14 +2287,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) ret = kvm_hv_flush_tlb(vcpu, &hc); break; case HVCALL_SEND_IPI: - if (unlikely(hc.rep)) { - ret = HV_STATUS_INVALID_HYPERCALL_INPUT; - break; - } - ret = kvm_hv_send_ipi(vcpu, &hc); - break; case HVCALL_SEND_IPI_EX: - if (unlikely(hc.fast || hc.rep)) { + if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; }
It has been proven on practice that at least Windows Server 2019 tries using HVCALL_SEND_IPI_EX in 'XMM fast' mode when it has more than 64 vCPUs and it needs to send an IPI to a vCPU > 63. Similarly to other XMM Fast hypercalls (HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}{,_EX}), this information is missing in TLFS as of 6.0b. Currently, KVM returns an error (HV_STATUS_INVALID_HYPERCALL_INPUT) and Windows crashes. Note, HVCALL_SEND_IPI is a 'standard' fast hypercall (not 'XMM fast') as all its parameters fit into RDX:R8 and this is handled by KVM correctly. Fixes: d8f5537a8816 ("KVM: hyper-v: Advertise support for fast XMM hypercalls") Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> --- arch/x86/kvm/hyperv.c | 52 ++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 18 deletions(-)