Message ID | da036c786700032b32e68ebece06fd1a6b6bf344.1618244920.git.sidcha@amazon.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add support for XMM fast hypercalls | expand |
On Mon, Apr 12, 2021 at 07:00:16PM +0200, Siddharth Chandrasekaran wrote: > + > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) Do you really need inline here? The compiler should be smart enough to inline this function if necessary. > +{ > + int reg; > + > + kvm_fpu_get(); > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > + _kvm_read_sse_reg(reg, &hc->xmm[reg]); > + kvm_fpu_put(); > + hc->xmm_dirty = false; There is no code that sets xmm_dirty to true? What am I missing? I guess that's because you haven't implemented the hypercalls that need writing back to guest? Wei.
On Mon, Apr 12, 2021 at 08:13:19PM +0000, Wei Liu wrote: > On Mon, Apr 12, 2021 at 07:00:16PM +0200, Siddharth Chandrasekaran wrote: > > + > > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) > > Do you really need inline here? The compiler should be smart enough to > inline this function if necessary. Removed. > > +{ > > + int reg; > > + > > + kvm_fpu_get(); > > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > > + _kvm_read_sse_reg(reg, &hc->xmm[reg]); > > + kvm_fpu_put(); > > + hc->xmm_dirty = false; > > There is no code that sets xmm_dirty to true? What am I missing? I guess > that's because you haven't implemented the hypercalls that need writing > back to guest? Yes, when a hypercall want to return data via XMM registers, it should update hc->xmm[] and set hc->dirty (I plan on using this in a future patch). The reason why I didn't differ this change to actual patch needs it is that it pairs nicely with the read/write xmm_reg() calls in kvm_hv_hypercall(). ~ Sid. Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
Siddharth Chandrasekaran <sidcha@amazon.de> writes: > Hyper-V supports the use of XMM registers to perform fast hypercalls. > This allows guests to take advantage of the improved performance of the > fast hypercall interface even though a hypercall may require more than > (the current maximum of) two input registers. > > The XMM fast hypercall interface uses six additional XMM registers (XMM0 > to XMM5) to allow the guest to pass an input parameter block of up to > 112 bytes. Hyper-V can also return data back to the guest in the > remaining XMM registers that are not used by the current hypercall. > > Add framework to read/write to XMM registers in kvm_hv_hypercall() and > use the additional hypercall inputs from XMM registers in > kvm_hv_flush_tlb() when possible. > > Cc: Alexander Graf <graf@amazon.com> > Co-developed-by: Evgeny Iakovlev <eyakovl@amazon.de> > Signed-off-by: Evgeny Iakovlev <eyakovl@amazon.de> > Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de> > --- > arch/x86/kvm/hyperv.c | 109 ++++++++++++++++++++++++++++++++++-------- > 1 file changed, 90 insertions(+), 19 deletions(-) > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > index 8f6babd1ea0d..1f9959aba70d 100644 > --- a/arch/x86/kvm/hyperv.c > +++ b/arch/x86/kvm/hyperv.c > @@ -36,6 +36,7 @@ > > #include "trace.h" > #include "irq.h" > +#include "fpu.h" > > /* "Hv#1" signature */ > #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 > @@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask( > return vcpu_bitmap; > } > > +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6 Nitpick: this is not KVM-specific so could probably go to arch/x86/include/asm/hyperv-tlfs.h > + > struct kvm_hv_hcall { > u64 param; > u64 ingpa; > @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall { > u16 rep_idx; > bool fast; > bool rep; > + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS]; > + bool xmm_dirty; > }; > > static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) > { > + int i, j; > + gpa_t gpa; > struct kvm *kvm = vcpu->kvm; > struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); > struct hv_tlb_flush_ex flush_ex; > @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > bool all_cpus; > > if (!ex) { > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > + if (hc->fast) { > + flush.address_space = hc->ingpa; > + flush.flags = hc->outgpa; > + flush.processor_mask = sse128_lo(hc->xmm[0]); > + } else { > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, > + &flush, sizeof(flush)))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } > > trace_kvm_hv_flush_tlb(flush.processor_mask, > flush.address_space, flush.flags); > @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || > flush.processor_mask == 0; > } else { > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, > - sizeof(flush_ex)))) > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > + if (hc->fast) { > + flush_ex.address_space = hc->ingpa; > + flush_ex.flags = hc->outgpa; > + memcpy(&flush_ex.hv_vp_set, > + &hc->xmm[0], sizeof(hc->xmm[0])); > + } else { > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, > + sizeof(flush_ex)))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } > > trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, > flush_ex.hv_vp_set.format, > @@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > all_cpus = flush_ex.hv_vp_set.format != > HV_GENERIC_SET_SPARSE_4K; > > - sparse_banks_len = > - bitmap_weight((unsigned long *)&valid_bank_mask, 64) * > - sizeof(sparse_banks[0]); > + sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64); > > if (!sparse_banks_len && !all_cpus) > goto ret_success; > > - if (!all_cpus && > - kvm_read_guest(kvm, > - hc->ingpa + offsetof(struct hv_tlb_flush_ex, > - hv_vp_set.bank_contents), > - sparse_banks, > - sparse_banks_len)) > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > + if (!all_cpus) { > + if (hc->fast) { > + if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) { Nitpick: you don't really need 'j' here as 'j == i/2 + 1', right? > + sparse_banks[i + 0] = sse128_lo(hc->xmm[j]); Using ' + 0' for identation is ... unusual :-) I'm not opposed to it here though. > + sparse_banks[i + 1] = sse128_hi(hc->xmm[j]); > + } > + } else { > + gpa = hc->ingpa; > + gpa += offsetof(struct hv_tlb_flush_ex, > + hv_vp_set.bank_contents); Nitpick: if splitting these into two lines is only done to fit into 80 chars then I'd the requirement is no more so we can be a bit wider. gpa = hc->ingpa + offsetof(...) > + if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks, > + sparse_banks_len * > + sizeof(sparse_banks[0])))) > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > + } > + } > } > > cpumask_clear(&hv_vcpu->tlb_flush); > @@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h > return HV_STATUS_SUCCESS; > } > > +static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) > +{ > + switch (hc->code) { > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: > + return true; > + } > + > + return false; > +} > + > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) > +{ > + int reg; > + > + kvm_fpu_get(); > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > + _kvm_read_sse_reg(reg, &hc->xmm[reg]); > + kvm_fpu_put(); > + hc->xmm_dirty = false; > +} > + > +static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc) > +{ > + int reg; > + > + kvm_fpu_get(); > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > + _kvm_write_sse_reg(reg, &hc->xmm[reg]); > + kvm_fpu_put(); > + hc->xmm_dirty = false; > +} > + > int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > { > struct kvm_hv_hcall hc; > @@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; > hc.rep = !!(hc.rep_cnt || hc.rep_idx); > > + if (hc.fast && is_xmm_fast_hypercall(&hc)) > + kvm_hv_hypercall_read_xmm(&hc); > + > trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, > hc.ingpa, hc.outgpa); > > @@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > kvm_hv_hypercall_complete_userspace; > return 0; > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > break; > } > ret = kvm_hv_flush_tlb(vcpu, &hc, false); > break; > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: > - if (unlikely(hc.fast || hc.rep)) { > + if (unlikely(hc.rep)) { > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > break; > } > ret = kvm_hv_flush_tlb(vcpu, &hc, false); > break; > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > break; > } > ret = kvm_hv_flush_tlb(vcpu, &hc, true); > break; > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: > - if (unlikely(hc.fast || hc.rep)) { > + if (unlikely(hc.rep)) { > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > break; > } > @@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > break; > } > > + if (hc.xmm_dirty) > + kvm_hv_hypercall_write_xmm(&hc); > + Wei already mention that but as 'xmm_dirty' is not being used in this patch I'd suggest we move it out too. > return kvm_hv_hypercall_complete(vcpu, ret); > }
On Tue, Apr 13, 2021 at 04:09:48PM +0200, Vitaly Kuznetsov wrote: > Siddharth Chandrasekaran <sidcha@amazon.de> writes: > > Hyper-V supports the use of XMM registers to perform fast hypercalls. > > This allows guests to take advantage of the improved performance of the > > fast hypercall interface even though a hypercall may require more than > > (the current maximum of) two input registers. > > > > The XMM fast hypercall interface uses six additional XMM registers (XMM0 > > to XMM5) to allow the guest to pass an input parameter block of up to > > 112 bytes. Hyper-V can also return data back to the guest in the > > remaining XMM registers that are not used by the current hypercall. > > > > Add framework to read/write to XMM registers in kvm_hv_hypercall() and > > use the additional hypercall inputs from XMM registers in > > kvm_hv_flush_tlb() when possible. > > > > Cc: Alexander Graf <graf@amazon.com> > > Co-developed-by: Evgeny Iakovlev <eyakovl@amazon.de> > > Signed-off-by: Evgeny Iakovlev <eyakovl@amazon.de> > > Signed-off-by: Siddharth Chandrasekaran <sidcha@amazon.de> > > --- > > arch/x86/kvm/hyperv.c | 109 ++++++++++++++++++++++++++++++++++-------- > > 1 file changed, 90 insertions(+), 19 deletions(-) > > > > diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c > > index 8f6babd1ea0d..1f9959aba70d 100644 > > --- a/arch/x86/kvm/hyperv.c > > +++ b/arch/x86/kvm/hyperv.c > > @@ -36,6 +36,7 @@ > > > > #include "trace.h" > > #include "irq.h" > > +#include "fpu.h" > > > > /* "Hv#1" signature */ > > #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 > > @@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask( > > return vcpu_bitmap; > > } > > > > +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6 > > Nitpick: this is not KVM-specific so could probably go to arch/x86/include/asm/hyperv-tlfs.h Ack. > > + > > struct kvm_hv_hcall { > > u64 param; > > u64 ingpa; > > @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall { > > u16 rep_idx; > > bool fast; > > bool rep; > > + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS]; > > + bool xmm_dirty; > > }; > > > > static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) > > { > > + int i, j; > > + gpa_t gpa; > > struct kvm *kvm = vcpu->kvm; > > struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); > > struct hv_tlb_flush_ex flush_ex; > > @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > > bool all_cpus; > > > > if (!ex) { > > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) > > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + if (hc->fast) { > > + flush.address_space = hc->ingpa; > > + flush.flags = hc->outgpa; > > + flush.processor_mask = sse128_lo(hc->xmm[0]); > > + } else { > > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, > > + &flush, sizeof(flush)))) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + } > > > > trace_kvm_hv_flush_tlb(flush.processor_mask, > > flush.address_space, flush.flags); > > @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > > all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || > > flush.processor_mask == 0; > > } else { > > - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, > > - sizeof(flush_ex)))) > > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + if (hc->fast) { > > + flush_ex.address_space = hc->ingpa; > > + flush_ex.flags = hc->outgpa; > > + memcpy(&flush_ex.hv_vp_set, > > + &hc->xmm[0], sizeof(hc->xmm[0])); > > + } else { > > + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, > > + sizeof(flush_ex)))) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + } > > > > trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, > > flush_ex.hv_vp_set.format, > > @@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool > > all_cpus = flush_ex.hv_vp_set.format != > > HV_GENERIC_SET_SPARSE_4K; > > > > - sparse_banks_len = > > - bitmap_weight((unsigned long *)&valid_bank_mask, 64) * > > - sizeof(sparse_banks[0]); > > + sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64); > > > > if (!sparse_banks_len && !all_cpus) > > goto ret_success; > > > > - if (!all_cpus && > > - kvm_read_guest(kvm, > > - hc->ingpa + offsetof(struct hv_tlb_flush_ex, > > - hv_vp_set.bank_contents), > > - sparse_banks, > > - sparse_banks_len)) > > - return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + if (!all_cpus) { > > + if (hc->fast) { > > + if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) { > > Nitpick: you don't really need 'j' here as 'j == i/2 + 1', right? Yes, you are right. Changed. > > + sparse_banks[i + 0] = sse128_lo(hc->xmm[j]); > > Using ' + 0' for identation is ... unusual :-) I'm not opposed to it > here though. Old habit :-). > > + sparse_banks[i + 1] = sse128_hi(hc->xmm[j]); > > + } > > + } else { > > + gpa = hc->ingpa; > > + gpa += offsetof(struct hv_tlb_flush_ex, > > + hv_vp_set.bank_contents); > > Nitpick: if splitting these into two lines is only done to fit into 80 > chars then I'd the requirement is no more so we can be a bit wider. > > gpa = hc->ingpa + offsetof(...) Ack. > > + if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks, > > + sparse_banks_len * > > + sizeof(sparse_banks[0])))) > > + return HV_STATUS_INVALID_HYPERCALL_INPUT; > > + } > > + } > > } > > > > cpumask_clear(&hv_vcpu->tlb_flush); > > @@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h > > return HV_STATUS_SUCCESS; > > } > > > > +static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) > > +{ > > + switch (hc->code) { > > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: > > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: > > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: > > + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: > > + return true; > > + } > > + > > + return false; > > +} > > + > > +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) > > +{ > > + int reg; > > + > > + kvm_fpu_get(); > > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > > + _kvm_read_sse_reg(reg, &hc->xmm[reg]); > > + kvm_fpu_put(); > > + hc->xmm_dirty = false; > > +} > > + > > +static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc) > > +{ > > + int reg; > > + > > + kvm_fpu_get(); > > + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) > > + _kvm_write_sse_reg(reg, &hc->xmm[reg]); > > + kvm_fpu_put(); > > + hc->xmm_dirty = false; > > +} > > + > > int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > > { > > struct kvm_hv_hcall hc; > > @@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > > hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; > > hc.rep = !!(hc.rep_cnt || hc.rep_idx); > > > > + if (hc.fast && is_xmm_fast_hypercall(&hc)) > > + kvm_hv_hypercall_read_xmm(&hc); > > + > > trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, > > hc.ingpa, hc.outgpa); > > > > @@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > > kvm_hv_hypercall_complete_userspace; > > return 0; > > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: > > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { > > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { > > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > > break; > > } > > ret = kvm_hv_flush_tlb(vcpu, &hc, false); > > break; > > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: > > - if (unlikely(hc.fast || hc.rep)) { > > + if (unlikely(hc.rep)) { > > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > > break; > > } > > ret = kvm_hv_flush_tlb(vcpu, &hc, false); > > break; > > case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: > > - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { > > + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { > > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > > break; > > } > > ret = kvm_hv_flush_tlb(vcpu, &hc, true); > > break; > > case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: > > - if (unlikely(hc.fast || hc.rep)) { > > + if (unlikely(hc.rep)) { > > ret = HV_STATUS_INVALID_HYPERCALL_INPUT; > > break; > > } > > @@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) > > break; > > } > > > > + if (hc.xmm_dirty) > > + kvm_hv_hypercall_write_xmm(&hc); > > + > > Wei already mention that but as 'xmm_dirty' is not being used in this > patch I'd suggest we move it out too. Okay, I should remove HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE for now then. ~ Sid. Amazon Development Center Germany GmbH Krausenstr. 38 10117 Berlin Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B Sitz: Berlin Ust-ID: DE 289 237 879
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 8f6babd1ea0d..1f9959aba70d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -36,6 +36,7 @@ #include "trace.h" #include "irq.h" +#include "fpu.h" /* "Hv#1" signature */ #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 @@ -1623,6 +1624,8 @@ static __always_inline unsigned long *sparse_set_to_vcpu_mask( return vcpu_bitmap; } +#define KVM_HV_HYPERCALL_MAX_XMM_REGISTERS 6 + struct kvm_hv_hcall { u64 param; u64 ingpa; @@ -1632,10 +1635,14 @@ struct kvm_hv_hcall { u16 rep_idx; bool fast; bool rep; + sse128_t xmm[KVM_HV_HYPERCALL_MAX_XMM_REGISTERS]; + bool xmm_dirty; }; static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool ex) { + int i, j; + gpa_t gpa; struct kvm *kvm = vcpu->kvm; struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct hv_tlb_flush_ex flush_ex; @@ -1649,8 +1656,15 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool bool all_cpus; if (!ex) { - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->fast) { + flush.address_space = hc->ingpa; + flush.flags = hc->outgpa; + flush.processor_mask = sse128_lo(hc->xmm[0]); + } else { + if (unlikely(kvm_read_guest(kvm, hc->ingpa, + &flush, sizeof(flush)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space, flush.flags); @@ -1668,9 +1682,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) || flush.processor_mask == 0; } else { - if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, - sizeof(flush_ex)))) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (hc->fast) { + flush_ex.address_space = hc->ingpa; + flush_ex.flags = hc->outgpa; + memcpy(&flush_ex.hv_vp_set, + &hc->xmm[0], sizeof(hc->xmm[0])); + } else { + if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, + sizeof(flush_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, flush_ex.hv_vp_set.format, @@ -1681,20 +1702,29 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc, bool all_cpus = flush_ex.hv_vp_set.format != HV_GENERIC_SET_SPARSE_4K; - sparse_banks_len = - bitmap_weight((unsigned long *)&valid_bank_mask, 64) * - sizeof(sparse_banks[0]); + sparse_banks_len = bitmap_weight((unsigned long *)&valid_bank_mask, 64); if (!sparse_banks_len && !all_cpus) goto ret_success; - if (!all_cpus && - kvm_read_guest(kvm, - hc->ingpa + offsetof(struct hv_tlb_flush_ex, - hv_vp_set.bank_contents), - sparse_banks, - sparse_banks_len)) - return HV_STATUS_INVALID_HYPERCALL_INPUT; + if (!all_cpus) { + if (hc->fast) { + if (sparse_banks_len > KVM_HV_HYPERCALL_MAX_XMM_REGISTERS - 1) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + for (i = 0, j = 1; i < sparse_banks_len; i += 2, j++) { + sparse_banks[i + 0] = sse128_lo(hc->xmm[j]); + sparse_banks[i + 1] = sse128_hi(hc->xmm[j]); + } + } else { + gpa = hc->ingpa; + gpa += offsetof(struct hv_tlb_flush_ex, + hv_vp_set.bank_contents); + if (unlikely(kvm_read_guest(kvm, gpa, sparse_banks, + sparse_banks_len * + sizeof(sparse_banks[0])))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + } } cpumask_clear(&hv_vcpu->tlb_flush); @@ -1890,6 +1920,41 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *h return HV_STATUS_SUCCESS; } +static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc) +{ + switch (hc->code) { + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: + return true; + } + + return false; +} + +static inline void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc) +{ + int reg; + + kvm_fpu_get(); + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) + _kvm_read_sse_reg(reg, &hc->xmm[reg]); + kvm_fpu_put(); + hc->xmm_dirty = false; +} + +static inline void kvm_hv_hypercall_write_xmm(struct kvm_hv_hcall *hc) +{ + int reg; + + kvm_fpu_get(); + for (reg = 0; reg < KVM_HV_HYPERCALL_MAX_XMM_REGISTERS; reg++) + _kvm_write_sse_reg(reg, &hc->xmm[reg]); + kvm_fpu_put(); + hc->xmm_dirty = false; +} + int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { struct kvm_hv_hcall hc; @@ -1926,6 +1991,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); + if (hc.fast && is_xmm_fast_hypercall(&hc)) + kvm_hv_hypercall_read_xmm(&hc); + trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); @@ -1961,28 +2029,28 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) kvm_hv_hypercall_complete_userspace; return 0; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } ret = kvm_hv_flush_tlb(vcpu, &hc, false); break; case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: - if (unlikely(hc.fast || hc.rep)) { + if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } ret = kvm_hv_flush_tlb(vcpu, &hc, false); break; case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: - if (unlikely(hc.fast || !hc.rep_cnt || hc.rep_idx)) { + if (unlikely(!hc.rep_cnt || hc.rep_idx)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } ret = kvm_hv_flush_tlb(vcpu, &hc, true); break; case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: - if (unlikely(hc.fast || hc.rep)) { + if (unlikely(hc.rep)) { ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } @@ -2035,6 +2103,9 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } + if (hc.xmm_dirty) + kvm_hv_hypercall_write_xmm(&hc); + return kvm_hv_hypercall_complete(vcpu, ret); }