diff mbox

[3/5] KVM: x86: hyperv: simplistic HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE} implementation

Message ID 20180402161059.8488-4-vkuznets@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vitaly Kuznetsov April 2, 2018, 4:10 p.m. UTC
Implement HvFlushVirtualAddress{List,Space} hypercalls in a simplistic way:
do full TLB flush with KVM_REQ_TLB_FLUSH and rely on kvm_vcpu_kick()
kicking only vCPUs which are currently IN_GUEST_MODE.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
---
 arch/x86/kvm/hyperv.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-------
 arch/x86/kvm/trace.h  | 24 +++++++++++++++++++++++
 2 files changed, 71 insertions(+), 7 deletions(-)

Comments

Radim Krčmář April 3, 2018, 7:15 p.m. UTC | #1
2018-04-02 18:10+0200, Vitaly Kuznetsov:
> Implement HvFlushVirtualAddress{List,Space} hypercalls in a simplistic way:
> do full TLB flush with KVM_REQ_TLB_FLUSH and rely on kvm_vcpu_kick()
> kicking only vCPUs which are currently IN_GUEST_MODE.
> 
> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
> ---
>  arch/x86/kvm/hyperv.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-------
>  arch/x86/kvm/trace.h  | 24 +++++++++++++++++++++++
>  2 files changed, 71 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
> index 3cb3bb68db7e..aa866994366d 100644
> --- a/arch/x86/kvm/hyperv.c
> +++ b/arch/x86/kvm/hyperv.c
> @@ -1242,6 +1242,49 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>  		return kvm_hv_get_msr(vcpu, msr, pdata);
>  }
>  
> +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
> +			    u16 rep_cnt)
> +{
> +	struct kvm *kvm = current_vcpu->kvm;
> +	struct hv_tlb_flush flush;
> +	struct kvm_vcpu *vcpu;
> +	int i;
> +
> +	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
> +		return HV_STATUS_INVALID_HYPERCALL_INPUT;
> +
> +	trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space,
> +			       flush.flags);
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
> +
> +		if (!(flush.flags & HV_FLUSH_ALL_PROCESSORS) &&
> +		    !(flush.processor_mask & BIT_ULL(hv->vp_index)))
> +			continue;
> +
> +		/*
> +		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
> +		 * can't analyze it here, flush TLB regardless of the specified
> +		 * address space.
> +		 */
> +		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
> +
> +		/*
> +		 * It is very unlikely but possible that we're doing an extra
> +		 * kick here (e.g. if the vCPU has just entered the guest and
> +		 * has its TLB flushed).
> +		 */
> +		if (vcpu != current_vcpu)
> +			kvm_vcpu_kick(vcpu);

The spec says that

 "This call guarantees that by the time control returns back to the
  caller, the observable effects of all flushes on the specified virtual
  processors have occurred."

Other KVM code doesn't assume that kvm_vcpu_kick() and a delay provides
that guarantee;  kvm_make_all_cpus_request waits for the target CPU to
exit before saying that TLB has been flushed.

I am leaning towards the safer variant here as well.  (Anyway, it's a
good time to figure out if we really need it.)

> +	}
> +
> +	/* We always do full TLB flush, set rep_done = rep_cnt. */
> +	return (u64)HV_STATUS_SUCCESS |
> +		((u64)rep_cnt << HV_HYPERCALL_REP_START_OFFSET) |

Why at bits 48-59?  I don't see this field in the spec.

> +		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
> +}
> +
>  bool kvm_hv_hypercall_enabled(struct kvm *kvm)
>  {
>  	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
> @@ -1345,12 +1388,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>  
>  	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
>  
> -	/* Hypercall continuation is not supported yet */
> -	if (rep_cnt || rep_idx) {
> -		ret = HV_STATUS_INVALID_HYPERCALL_CODE;

Hm, we should have returned HV_STATUS_INVALID_HYPERCALL_INPUT in any
case.  I think it would be good to still fail in case of non-rep
hypercalls,

thanks.
Vitaly Kuznetsov April 4, 2018, 9:27 a.m. UTC | #2
Radim Krčmář <rkrcmar@redhat.com> writes:

> 2018-04-02 18:10+0200, Vitaly Kuznetsov:
>> Implement HvFlushVirtualAddress{List,Space} hypercalls in a simplistic way:
>> do full TLB flush with KVM_REQ_TLB_FLUSH and rely on kvm_vcpu_kick()
>> kicking only vCPUs which are currently IN_GUEST_MODE.
>> 
>> Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
>> ---
>>  arch/x86/kvm/hyperv.c | 54 ++++++++++++++++++++++++++++++++++++++++++++-------
>>  arch/x86/kvm/trace.h  | 24 +++++++++++++++++++++++
>>  2 files changed, 71 insertions(+), 7 deletions(-)
>> 
>> diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
>> index 3cb3bb68db7e..aa866994366d 100644
>> --- a/arch/x86/kvm/hyperv.c
>> +++ b/arch/x86/kvm/hyperv.c
>> @@ -1242,6 +1242,49 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
>>  		return kvm_hv_get_msr(vcpu, msr, pdata);
>>  }
>>  
>> +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
>> +			    u16 rep_cnt)
>> +{
>> +	struct kvm *kvm = current_vcpu->kvm;
>> +	struct hv_tlb_flush flush;
>> +	struct kvm_vcpu *vcpu;
>> +	int i;
>> +
>> +	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
>> +		return HV_STATUS_INVALID_HYPERCALL_INPUT;
>> +
>> +	trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space,
>> +			       flush.flags);
>> +
>> +	kvm_for_each_vcpu(i, vcpu, kvm) {
>> +		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
>> +
>> +		if (!(flush.flags & HV_FLUSH_ALL_PROCESSORS) &&
>> +		    !(flush.processor_mask & BIT_ULL(hv->vp_index)))
>> +			continue;
>> +
>> +		/*
>> +		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
>> +		 * can't analyze it here, flush TLB regardless of the specified
>> +		 * address space.
>> +		 */
>> +		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
>> +
>> +		/*
>> +		 * It is very unlikely but possible that we're doing an extra
>> +		 * kick here (e.g. if the vCPU has just entered the guest and
>> +		 * has its TLB flushed).
>> +		 */
>> +		if (vcpu != current_vcpu)
>> +			kvm_vcpu_kick(vcpu);
>
> The spec says that
>
>  "This call guarantees that by the time control returns back to the
>   caller, the observable effects of all flushes on the specified virtual
>   processors have occurred."
>
> Other KVM code doesn't assume that kvm_vcpu_kick() and a delay provides
> that guarantee;  kvm_make_all_cpus_request waits for the target CPU to
> exit before saying that TLB has been flushed.
>
> I am leaning towards the safer variant here as well.  (Anyway, it's a
> good time to figure out if we really need it.)

Ha, it depends on how we define "observable effects" :-)

I think kvm_vcpu_kick() is enough as the corresponding vCPU can't
actually observe old mapping after being kicked (even if we didn't flush
yet we're not running). Or do you see any possible problem with such
definition?


>
>> +	}
>> +
>> +	/* We always do full TLB flush, set rep_done = rep_cnt. */
>> +	return (u64)HV_STATUS_SUCCESS |
>> +		((u64)rep_cnt << HV_HYPERCALL_REP_START_OFFSET) |
>
> Why at bits 48-59?  I don't see this field in the spec.
>

True, it is only for 'input'. Will drop.


>> +		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
>> +}
>> +
>>  bool kvm_hv_hypercall_enabled(struct kvm *kvm)
>>  {
>>  	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
>> @@ -1345,12 +1388,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
>>  
>>  	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
>>  
>> -	/* Hypercall continuation is not supported yet */
>> -	if (rep_cnt || rep_idx) {
>> -		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
>
> Hm, we should have returned HV_STATUS_INVALID_HYPERCALL_INPUT in any
> case.  I think it would be good to still fail in case of non-rep
> hypercalls,

Sure. I skimmed through the spec and didn't find any direct reference
that specifying 'rep' bit for non-rep hypercalls is forbidden but this
is definitely a guest bug if it does that. 

Thanks for the review!
Vitaly Kuznetsov April 4, 2018, 9:41 a.m. UTC | #3
Vitaly Kuznetsov <vkuznets@redhat.com> writes:

> Radim Krčmář <rkrcmar@redhat.com> writes:
>
>> 2018-04-02 18:10+0200, Vitaly Kuznetsov:
>>> +		if (vcpu != current_vcpu)
>>> +			kvm_vcpu_kick(vcpu);
>>
>> The spec says that
>>
>>  "This call guarantees that by the time control returns back to the
>>   caller, the observable effects of all flushes on the specified virtual
>>   processors have occurred."
>>
>> Other KVM code doesn't assume that kvm_vcpu_kick() and a delay provides
>> that guarantee;  kvm_make_all_cpus_request waits for the target CPU to
>> exit before saying that TLB has been flushed.
>>
>> I am leaning towards the safer variant here as well.  (Anyway, it's a
>> good time to figure out if we really need it.)
>
> Ha, it depends on how we define "observable effects" :-)
>
> I think kvm_vcpu_kick() is enough as the corresponding vCPU can't
> actually observe old mapping after being kicked (even if we didn't flush
> yet we're not running). Or do you see any possible problem with such
> definition?
>

Oh, now I see it myself -- native_smp_send_reschedule() only does
apic->send_IPI() so this is indeed unsafe. We need something like
kvm_make_all_cpus_request() with a mask (and, to make it fast, we'll
probably have to pre-allocate these).

Will do in v2, thanks!
diff mbox

Patch

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3cb3bb68db7e..aa866994366d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1242,6 +1242,49 @@  int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 		return kvm_hv_get_msr(vcpu, msr, pdata);
 }
 
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+			    u16 rep_cnt)
+{
+	struct kvm *kvm = current_vcpu->kvm;
+	struct hv_tlb_flush flush;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+		return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+	trace_kvm_hv_flush_tlb(flush.processor_mask, flush.address_space,
+			       flush.flags);
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+
+		if (!(flush.flags & HV_FLUSH_ALL_PROCESSORS) &&
+		    !(flush.processor_mask & BIT_ULL(hv->vp_index)))
+			continue;
+
+		/*
+		 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+		 * can't analyze it here, flush TLB regardless of the specified
+		 * address space.
+		 */
+		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+
+		/*
+		 * It is very unlikely but possible that we're doing an extra
+		 * kick here (e.g. if the vCPU has just entered the guest and
+		 * has its TLB flushed).
+		 */
+		if (vcpu != current_vcpu)
+			kvm_vcpu_kick(vcpu);
+	}
+
+	/* We always do full TLB flush, set rep_done = rep_cnt. */
+	return (u64)HV_STATUS_SUCCESS |
+		((u64)rep_cnt << HV_HYPERCALL_REP_START_OFFSET) |
+		((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+}
+
 bool kvm_hv_hypercall_enabled(struct kvm *kvm)
 {
 	return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1345,12 +1388,6 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 
 	trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
 
-	/* Hypercall continuation is not supported yet */
-	if (rep_cnt || rep_idx) {
-		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
-		goto set_result;
-	}
-
 	switch (code) {
 	case HVCALL_NOTIFY_LONG_SPIN_WAIT:
 		kvm_vcpu_on_spin(vcpu, true);
@@ -1374,12 +1411,15 @@  int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
 		vcpu->arch.complete_userspace_io =
 				kvm_hv_hypercall_complete_userspace;
 		return 0;
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+	case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+		ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt);
+		break;
 	default:
 		ret = HV_STATUS_INVALID_HYPERCALL_CODE;
 		break;
 	}
 
-set_result:
 	kvm_hv_hypercall_set_result(vcpu, ret);
 	return 1;
 }
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 9807c314c478..47a4fd758743 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1367,6 +1367,30 @@  TRACE_EVENT(kvm_hv_timer_state,
 			__entry->vcpu_id,
 			__entry->hv_timer_in_use)
 );
+
+/*
+ * Tracepoint for kvm_hv_flush_tlb.
+ */
+TRACE_EVENT(kvm_hv_flush_tlb,
+	TP_PROTO(u64 processor_mask, u64 address_space, u64 flags),
+	TP_ARGS(processor_mask, address_space, flags),
+
+	TP_STRUCT__entry(
+		__field(u64, processor_mask)
+		__field(u64, address_space)
+		__field(u64, flags)
+	),
+
+	TP_fast_assign(
+		__entry->processor_mask = processor_mask;
+		__entry->address_space = address_space;
+		__entry->flags = flags;
+	),
+
+	TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx",
+		  __entry->processor_mask, __entry->address_space,
+		  __entry->flags)
+);
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH