diff mbox series

[v3,11/34] KVM: x86: hyper-v: Use preallocated buffer in 'struct kvm_vcpu_hv' instead of on-stack 'sparse_banks'

Message ID 20220414132013.1588929-12-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: x86: hyper-v: Fine-grained TLB flush + L2 TLB flush feature | expand

Commit Message

Vitaly Kuznetsov April 14, 2022, 1:19 p.m. UTC
To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
dynamically.

Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
won't be used to handle L2 TLB flush requests.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 arch/x86/kvm/hyperv.c           | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

Comments

Maxim Levitsky May 11, 2022, 11:25 a.m. UTC | #1
On Thu, 2022-04-14 at 15:19 +0200, Vitaly Kuznetsov wrote:
> To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
> to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
> may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
> 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
> dynamically.
> 
> Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
> won't be used to handle L2 TLB flush requests.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 3 +++
>  arch/x86/kvm/hyperv.c           | 6 ++++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 058061621872..837c07e213de 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -619,6 +619,9 @@ struct kvm_vcpu_hv {
>  	} cpuid_cache;
>  
>  	struct kvm_vcpu_hv_tlb_flush_ring tlb_flush_ring[HV_NR_TLB_FLUSH_RINGS];
> +
> +	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
> +	u64 sparse_banks[64];
>  };
>  
>  /* Xen HVM per vcpu emulation context */
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 1cef2b8f7001..e9793d36acca 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1968,6 +1968,8 @@ void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
>  
>  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  {
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +	u64 *sparse_banks = hv_vcpu->sparse_banks;
>  	struct kvm *kvm = vcpu->kvm;
>  	struct hv_tlb_flush_ex flush_ex;
>  	struct hv_tlb_flush flush;
> @@ -1982,7 +1984,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  	u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_RING_SIZE - 2];
>  	u64 *tlb_flush_entries;
>  	u64 valid_bank_mask;
> -	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
>  	struct kvm_vcpu *v;
>  	unsigned long i;
>  	bool all_cpus;
> @@ -2134,11 +2135,12 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
>  
>  static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
>  {
> +	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
> +	u64 *sparse_banks = hv_vcpu->sparse_banks;
>  	struct kvm *kvm = vcpu->kvm;
>  	struct hv_send_ipi_ex send_ipi_ex;
>  	struct hv_send_ipi send_ipi;
>  	unsigned long valid_bank_mask;
> -	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
>  	u32 vector;
>  	bool all_cpus;
>  

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>

Best regards,
	Maxim Levitsky
Sean Christopherson May 16, 2022, 8:05 p.m. UTC | #2
On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote:
> To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
> to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
> may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
> 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
> dynamically.
> 
> Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
> won't be used to handle L2 TLB flush requests.

I think it's worth using stronger language; handling TLB flushes for L2 _can't_
use sparse_set_to_vcpu_mask() because KVM has no idea how to translate an L2
vCPU index to an L1 vCPU.  I found the above mildly confusing because it didn't
call out "vp_bitmap" and so I assumed the note referred to yet another sparse_banks
"allocation".  And while vp_bitmap is related to sparse_banks, it tracks something
entirely different.

Something like?

Note: sparse_set_to_vcpu_mask() can never be used to handle L2 requests as
KVM can't translate L2 vCPU indices to L1 vCPUs, i.e. its vp_bitmap array
is still bounded by the number of L1 vCPUs and so can remain an on-stack
allocation.

> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h | 3 +++
>  arch/x86/kvm/hyperv.c           | 6 ++++--
>  2 files changed, 7 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 058061621872..837c07e213de 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -619,6 +619,9 @@ struct kvm_vcpu_hv {
>  	} cpuid_cache;
>  
>  	struct kvm_vcpu_hv_tlb_flush_ring tlb_flush_ring[HV_NR_TLB_FLUSH_RINGS];
> +
> +	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
> +	u64 sparse_banks[64];

Shouldn't this be HV_MAX_SPARSE_VCPU_BANKS?
Vitaly Kuznetsov May 17, 2022, 1:51 p.m. UTC | #3
Sean Christopherson <seanjc@google.com> writes:

> On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote:
>> To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
>> to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
>> may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
>> 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
>> dynamically.
>> 
>> Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
>> won't be used to handle L2 TLB flush requests.
>
> I think it's worth using stronger language; handling TLB flushes for L2 _can't_
> use sparse_set_to_vcpu_mask() because KVM has no idea how to translate an L2
> vCPU index to an L1 vCPU.  I found the above mildly confusing because it didn't
> call out "vp_bitmap" and so I assumed the note referred to yet another sparse_banks
> "allocation".  And while vp_bitmap is related to sparse_banks, it tracks something
> entirely different.
>
> Something like?
>
> Note: sparse_set_to_vcpu_mask() can never be used to handle L2 requests as
> KVM can't translate L2 vCPU indices to L1 vCPUs, i.e. its vp_bitmap array
> is still bounded by the number of L1 vCPUs and so can remain an on-stack
> allocation.

My brain is probably tainted by looking at all this for some time so I
really appreciate such improvements, thanks :)

I wouldn't, however, say "never" ('never say never' :-)): KVM could've
kept 2-level reverse mapping up-to-date:

KVM -> L2 VM list -> L2 vCPU ids -> L1 vCPUs which run them

making it possible for KVM to quickly translate between L2 VP IDs and L1
vCPUs. I don't do this in the series and just record L2 VM_ID/VP_ID for
each L1 vCPU so I have to go over them all for each request. The
optimization is, however, possible and we may get to it if really big
Windows VMs become a reality.

>
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h | 3 +++
>>  arch/x86/kvm/hyperv.c           | 6 ++++--
>>  2 files changed, 7 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>> index 058061621872..837c07e213de 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -619,6 +619,9 @@ struct kvm_vcpu_hv {
>>  	} cpuid_cache;
>>  
>>  	struct kvm_vcpu_hv_tlb_flush_ring tlb_flush_ring[HV_NR_TLB_FLUSH_RINGS];
>> +
>> +	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
>> +	u64 sparse_banks[64];
>
> Shouldn't this be HV_MAX_SPARSE_VCPU_BANKS?
>

It certainly should, thanks!
Sean Christopherson May 17, 2022, 2:04 p.m. UTC | #4
On Tue, May 17, 2022, Vitaly Kuznetsov wrote:
> Sean Christopherson <seanjc@google.com> writes:
> 
> > On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote:
> >> To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
> >> to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
> >> may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
> >> 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
> >> dynamically.
> >> 
> >> Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
> >> won't be used to handle L2 TLB flush requests.
> >
> > I think it's worth using stronger language; handling TLB flushes for L2 _can't_
> > use sparse_set_to_vcpu_mask() because KVM has no idea how to translate an L2
> > vCPU index to an L1 vCPU.  I found the above mildly confusing because it didn't
> > call out "vp_bitmap" and so I assumed the note referred to yet another sparse_banks
> > "allocation".  And while vp_bitmap is related to sparse_banks, it tracks something
> > entirely different.
> >
> > Something like?
> >
> > Note: sparse_set_to_vcpu_mask() can never be used to handle L2 requests as
> > KVM can't translate L2 vCPU indices to L1 vCPUs, i.e. its vp_bitmap array
> > is still bounded by the number of L1 vCPUs and so can remain an on-stack
> > allocation.
> 
> My brain is probably tainted by looking at all this for some time so I
> really appreciate such improvements, thanks :)
> 
> I wouldn't, however, say "never" ('never say never' :-)): KVM could've
> kept 2-level reverse mapping up-to-date:
> 
> KVM -> L2 VM list -> L2 vCPU ids -> L1 vCPUs which run them
> 
> making it possible for KVM to quickly translate between L2 VP IDs and L1
> vCPUs. I don't do this in the series and just record L2 VM_ID/VP_ID for
> each L1 vCPU so I have to go over them all for each request. The
> optimization is, however, possible and we may get to it if really big
> Windows VMs become a reality.

Out of curiosity, is L1 "required" to provides the L2 => L1 translation/map?
Vitaly Kuznetsov May 17, 2022, 2:19 p.m. UTC | #5
Sean Christopherson <seanjc@google.com> writes:

> On Tue, May 17, 2022, Vitaly Kuznetsov wrote:
>> Sean Christopherson <seanjc@google.com> writes:
>> 
>> > On Thu, Apr 14, 2022, Vitaly Kuznetsov wrote:
>> >> To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
>> >> to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
>> >> may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
>> >> 'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
>> >> dynamically.
>> >> 
>> >> Note: sparse_set_to_vcpu_mask() keeps using on-stack allocation as it
>> >> won't be used to handle L2 TLB flush requests.
>> >
>> > I think it's worth using stronger language; handling TLB flushes for L2 _can't_
>> > use sparse_set_to_vcpu_mask() because KVM has no idea how to translate an L2
>> > vCPU index to an L1 vCPU.  I found the above mildly confusing because it didn't
>> > call out "vp_bitmap" and so I assumed the note referred to yet another sparse_banks
>> > "allocation".  And while vp_bitmap is related to sparse_banks, it tracks something
>> > entirely different.
>> >
>> > Something like?
>> >
>> > Note: sparse_set_to_vcpu_mask() can never be used to handle L2 requests as
>> > KVM can't translate L2 vCPU indices to L1 vCPUs, i.e. its vp_bitmap array
>> > is still bounded by the number of L1 vCPUs and so can remain an on-stack
>> > allocation.
>> 
>> My brain is probably tainted by looking at all this for some time so I
>> really appreciate such improvements, thanks :)
>> 
>> I wouldn't, however, say "never" ('never say never' :-)): KVM could've
>> kept 2-level reverse mapping up-to-date:
>> 
>> KVM -> L2 VM list -> L2 vCPU ids -> L1 vCPUs which run them
>> 
>> making it possible for KVM to quickly translate between L2 VP IDs and L1
>> vCPUs. I don't do this in the series and just record L2 VM_ID/VP_ID for
>> each L1 vCPU so I have to go over them all for each request. The
>> optimization is, however, possible and we may get to it if really big
>> Windows VMs become a reality.
>
> Out of curiosity, is L1 "required" to provides the L2 => L1 translation/map?
>

To make this "Direct Virtual Flush" feature work? Yes, it is:

...
"
Before enabling it, the L1 hypervisor must configure the following
additional fields of the enlightened VMCS:
- VpId: ID of the virtual processor that the enlightened VMCS controls.
- VmId: ID of the virtual machine that the enlightened VMCS belongs to.
- PartitionAssistPage: Guest physical address of the partition assist
page.
"
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 058061621872..837c07e213de 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -619,6 +619,9 @@  struct kvm_vcpu_hv {
 	} cpuid_cache;
 
 	struct kvm_vcpu_hv_tlb_flush_ring tlb_flush_ring[HV_NR_TLB_FLUSH_RINGS];
+
+	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+	u64 sparse_banks[64];
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 1cef2b8f7001..e9793d36acca 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1968,6 +1968,8 @@  void kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 *sparse_banks = hv_vcpu->sparse_banks;
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
@@ -1982,7 +1984,6 @@  static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_RING_SIZE - 2];
 	u64 *tlb_flush_entries;
 	u64 valid_bank_mask;
-	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	struct kvm_vcpu *v;
 	unsigned long i;
 	bool all_cpus;
@@ -2134,11 +2135,12 @@  static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
 
 static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 *sparse_banks = hv_vcpu->sparse_banks;
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
 	unsigned long valid_bank_mask;
-	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	u32 vector;
 	bool all_cpus;