diff mbox

[v3,2/5] KVM: nVMX: Rework event injection and recovery

Message ID 67b8ce93ddd1185808b4961d0084ab9c7ca0c43d.1364150685.git.jan.kiszka@web.de (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kiszka March 24, 2013, 6:44 p.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>

The basic idea is to always transfer the pending event injection on
vmexit into the architectural state of the VCPU and then drop it from
there if it turns out that we left L2 to enter L1, i.e. if we enter
prepare_vmcs12.

vmcs12_save_pending_events takes care to transfer pending L0 events into
the queue of L1. That is mandatory as L1 may decide to switch the guest
state completely, invalidating or preserving the pending events for
later injection (including on a different node, once we support
migration).

This concept is based on the rule that a pending vmlaunch/vmresume is
not canceled. Otherwise, we would risk to lose injected events or leak
them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
entry of nested_vmx_vmexit.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 arch/x86/kvm/vmx.c |   90 +++++++++++++++++++++++++++++++++------------------
 1 files changed, 58 insertions(+), 32 deletions(-)

Comments

Gleb Natapov April 10, 2013, 1:42 p.m. UTC | #1
On Sun, Mar 24, 2013 at 07:44:45PM +0100, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> The basic idea is to always transfer the pending event injection on
> vmexit into the architectural state of the VCPU and then drop it from
> there if it turns out that we left L2 to enter L1, i.e. if we enter
> prepare_vmcs12.
> 
> vmcs12_save_pending_events takes care to transfer pending L0 events into
> the queue of L1. That is mandatory as L1 may decide to switch the guest
> state completely, invalidating or preserving the pending events for
> later injection (including on a different node, once we support
> migration).
> 
> This concept is based on the rule that a pending vmlaunch/vmresume is
> not canceled. Otherwise, we would risk to lose injected events or leak
> them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
> entry of nested_vmx_vmexit.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/kvm/vmx.c |   90 +++++++++++++++++++++++++++++++++------------------
>  1 files changed, 58 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 8827b3b..9d9ff74 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6493,8 +6493,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
>  
>  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  {
> -	if (is_guest_mode(&vmx->vcpu))
> -		return;
>  	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
>  				  VM_EXIT_INSTRUCTION_LEN,
>  				  IDT_VECTORING_ERROR_CODE);
> @@ -6502,8 +6500,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  
>  static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu))
> -		return;
>  	__vmx_complete_interrupts(vcpu,
>  				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
>  				  VM_ENTRY_INSTRUCTION_LEN,
> @@ -6535,21 +6531,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long debugctlmsr;
>  
> -	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> -		if (vmcs12->idt_vectoring_info_field &
> -				VECTORING_INFO_VALID_MASK) {
> -			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -				vmcs12->idt_vectoring_info_field);
> -			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> -				vmcs12->vm_exit_instruction_len);
> -			if (vmcs12->idt_vectoring_info_field &
> -					VECTORING_INFO_DELIVER_CODE_MASK)
> -				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> -					vmcs12->idt_vectoring_error_code);
> -		}
> -	}
> -
>  	/* Record the guest's net vcpu time for enforced NMI injections. */
>  	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
>  		vmx->entry_time = ktime_get();
> @@ -6708,17 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
>  
> -	if (is_guest_mode(vcpu)) {
> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> -		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
> -		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
> -			vmcs12->idt_vectoring_error_code =
> -				vmcs_read32(IDT_VECTORING_ERROR_CODE);
> -			vmcs12->vm_exit_instruction_len =
> -				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> -		}
> -	}
> -
>  	vmx->loaded_vmcs->launched = 1;
>  
>  	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
> @@ -7325,6 +7295,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  			vcpu->arch.cr4_guest_owned_bits));
>  }
>  
> +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
> +				       struct vmcs12 *vmcs12)
> +{
> +	u32 idt_vectoring;
> +	unsigned int nr;
> +
> +	if (vcpu->arch.exception.pending) {
> +		nr = vcpu->arch.exception.nr;
> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
> +
> +		if (kvm_exception_is_soft(nr)) {
> +			vmcs12->vm_exit_instruction_len =
> +				vcpu->arch.event_exit_inst_len;
> +			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
> +		} else
> +			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
> +
> +		if (vcpu->arch.exception.has_error_code) {
> +			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
> +			vmcs12->idt_vectoring_error_code =
> +				vcpu->arch.exception.error_code;
> +		}
> +
> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
> +	} else if (vcpu->arch.nmi_pending) {
> +		vmcs12->idt_vectoring_info_field =
> +			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
> +	} else if (vcpu->arch.interrupt.pending) {
> +		nr = vcpu->arch.interrupt.nr;
> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
> +
> +		if (vcpu->arch.interrupt.soft) {
> +			idt_vectoring |= INTR_TYPE_SOFT_INTR;
> +			vmcs12->vm_entry_instruction_len =
> +				vcpu->arch.event_exit_inst_len;
> +		} else
> +			idt_vectoring |= INTR_TYPE_EXT_INTR;
> +
> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
> +	}
> +}
> +
>  /*
>   * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
>   * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
> @@ -7416,9 +7428,20 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>  	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
>  
> -	/* clear vm-entry fields which are to be cleared on exit */
>  	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
> -		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
Why have you dropped this? Where is it cleaned now?

> +		/*
> +		 * Transfer the event that L0 or L1 may wanted to inject into
> +		 * L2 to IDT_VECTORING_INFO_FIELD.
> +		 */
> +		vmcs12_save_pending_event(vcpu, vmcs12);
> +
> +	/*
> +	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
> +	 * preserved above and would only end up incorrectly in L1.
> +	 */
> +	vcpu->arch.nmi_injected = false;
> +	kvm_clear_exception_queue(vcpu);
> +	kvm_clear_interrupt_queue(vcpu);
>  }
>  
>  /*
> @@ -7518,6 +7541,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
>  	int cpu;
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>  
> +	/* trying to cancel vmlaunch/vmresume is a bug */
> +	WARN_ON_ONCE(vmx->nested.nested_run_pending);
> +
>  	leave_guest_mode(vcpu);
>  	prepare_vmcs12(vcpu, vmcs12);
>  
> -- 
> 1.7.3.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka April 10, 2013, 1:49 p.m. UTC | #2
On 2013-04-10 15:42, Gleb Natapov wrote:
> On Sun, Mar 24, 2013 at 07:44:45PM +0100, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@siemens.com>
>>
>> The basic idea is to always transfer the pending event injection on
>> vmexit into the architectural state of the VCPU and then drop it from
>> there if it turns out that we left L2 to enter L1, i.e. if we enter
>> prepare_vmcs12.
>>
>> vmcs12_save_pending_events takes care to transfer pending L0 events into
>> the queue of L1. That is mandatory as L1 may decide to switch the guest
>> state completely, invalidating or preserving the pending events for
>> later injection (including on a different node, once we support
>> migration).
>>
>> This concept is based on the rule that a pending vmlaunch/vmresume is
>> not canceled. Otherwise, we would risk to lose injected events or leak
>> them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
>> entry of nested_vmx_vmexit.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  arch/x86/kvm/vmx.c |   90 +++++++++++++++++++++++++++++++++------------------
>>  1 files changed, 58 insertions(+), 32 deletions(-)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 8827b3b..9d9ff74 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -6493,8 +6493,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
>>  
>>  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>>  {
>> -	if (is_guest_mode(&vmx->vcpu))
>> -		return;
>>  	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
>>  				  VM_EXIT_INSTRUCTION_LEN,
>>  				  IDT_VECTORING_ERROR_CODE);
>> @@ -6502,8 +6500,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>>  
>>  static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
>>  {
>> -	if (is_guest_mode(vcpu))
>> -		return;
>>  	__vmx_complete_interrupts(vcpu,
>>  				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
>>  				  VM_ENTRY_INSTRUCTION_LEN,
>> @@ -6535,21 +6531,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>>  	unsigned long debugctlmsr;
>>  
>> -	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
>> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> -		if (vmcs12->idt_vectoring_info_field &
>> -				VECTORING_INFO_VALID_MASK) {
>> -			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>> -				vmcs12->idt_vectoring_info_field);
>> -			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
>> -				vmcs12->vm_exit_instruction_len);
>> -			if (vmcs12->idt_vectoring_info_field &
>> -					VECTORING_INFO_DELIVER_CODE_MASK)
>> -				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
>> -					vmcs12->idt_vectoring_error_code);
>> -		}
>> -	}
>> -
>>  	/* Record the guest's net vcpu time for enforced NMI injections. */
>>  	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
>>  		vmx->entry_time = ktime_get();
>> @@ -6708,17 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>>  
>>  	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
>>  
>> -	if (is_guest_mode(vcpu)) {
>> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> -		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
>> -		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
>> -			vmcs12->idt_vectoring_error_code =
>> -				vmcs_read32(IDT_VECTORING_ERROR_CODE);
>> -			vmcs12->vm_exit_instruction_len =
>> -				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>> -		}
>> -	}
>> -
>>  	vmx->loaded_vmcs->launched = 1;
>>  
>>  	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
>> @@ -7325,6 +7295,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  			vcpu->arch.cr4_guest_owned_bits));
>>  }
>>  
>> +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
>> +				       struct vmcs12 *vmcs12)
>> +{
>> +	u32 idt_vectoring;
>> +	unsigned int nr;
>> +
>> +	if (vcpu->arch.exception.pending) {
>> +		nr = vcpu->arch.exception.nr;
>> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>> +
>> +		if (kvm_exception_is_soft(nr)) {
>> +			vmcs12->vm_exit_instruction_len =
>> +				vcpu->arch.event_exit_inst_len;
>> +			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
>> +		} else
>> +			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
>> +
>> +		if (vcpu->arch.exception.has_error_code) {
>> +			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
>> +			vmcs12->idt_vectoring_error_code =
>> +				vcpu->arch.exception.error_code;
>> +		}
>> +
>> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
>> +	} else if (vcpu->arch.nmi_pending) {
>> +		vmcs12->idt_vectoring_info_field =
>> +			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
>> +	} else if (vcpu->arch.interrupt.pending) {
>> +		nr = vcpu->arch.interrupt.nr;
>> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
>> +
>> +		if (vcpu->arch.interrupt.soft) {
>> +			idt_vectoring |= INTR_TYPE_SOFT_INTR;
>> +			vmcs12->vm_entry_instruction_len =
>> +				vcpu->arch.event_exit_inst_len;
>> +		} else
>> +			idt_vectoring |= INTR_TYPE_EXT_INTR;
>> +
>> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
>> +	}
>> +}
>> +
>>  /*
>>   * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
>>   * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
>> @@ -7416,9 +7428,20 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>  	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>>  	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
>>  
>> -	/* clear vm-entry fields which are to be cleared on exit */
>>  	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
>> -		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
> Why have you dropped this? Where is it cleaned now?

Hmm, looks like I read something like "vm_exit_intr_info". Will restore
and just improve the comment.

Jan
Gleb Natapov April 11, 2013, 11:22 a.m. UTC | #3
On Sun, Mar 24, 2013 at 07:44:45PM +0100, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> The basic idea is to always transfer the pending event injection on
> vmexit into the architectural state of the VCPU and then drop it from
> there if it turns out that we left L2 to enter L1, i.e. if we enter
> prepare_vmcs12.
> 
> vmcs12_save_pending_events takes care to transfer pending L0 events into
> the queue of L1. That is mandatory as L1 may decide to switch the guest
> state completely, invalidating or preserving the pending events for
> later injection (including on a different node, once we support
> migration).
> 
> This concept is based on the rule that a pending vmlaunch/vmresume is
> not canceled. Otherwise, we would risk to lose injected events or leak
> them into the wrong queues. Encode this rule via a WARN_ON_ONCE at the
> entry of nested_vmx_vmexit.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/kvm/vmx.c |   90 +++++++++++++++++++++++++++++++++------------------
>  1 files changed, 58 insertions(+), 32 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 8827b3b..9d9ff74 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6493,8 +6493,6 @@ static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
>  
>  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  {
> -	if (is_guest_mode(&vmx->vcpu))
> -		return;
>  	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
>  				  VM_EXIT_INSTRUCTION_LEN,
>  				  IDT_VECTORING_ERROR_CODE);
> @@ -6502,8 +6500,6 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  
>  static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu))
> -		return;
>  	__vmx_complete_interrupts(vcpu,
>  				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
>  				  VM_ENTRY_INSTRUCTION_LEN,
> @@ -6535,21 +6531,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	unsigned long debugctlmsr;
>  
> -	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> -		if (vmcs12->idt_vectoring_info_field &
> -				VECTORING_INFO_VALID_MASK) {
> -			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -				vmcs12->idt_vectoring_info_field);
> -			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> -				vmcs12->vm_exit_instruction_len);
> -			if (vmcs12->idt_vectoring_info_field &
> -					VECTORING_INFO_DELIVER_CODE_MASK)
> -				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
> -					vmcs12->idt_vectoring_error_code);
> -		}
> -	}
> -
>  	/* Record the guest's net vcpu time for enforced NMI injections. */
>  	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
>  		vmx->entry_time = ktime_get();
> @@ -6708,17 +6689,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  
>  	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
>  
> -	if (is_guest_mode(vcpu)) {
> -		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> -		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
> -		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
> -			vmcs12->idt_vectoring_error_code =
> -				vmcs_read32(IDT_VECTORING_ERROR_CODE);
> -			vmcs12->vm_exit_instruction_len =
> -				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
> -		}
> -	}
> -
>  	vmx->loaded_vmcs->launched = 1;
>  
>  	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
> @@ -7325,6 +7295,48 @@ vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  			vcpu->arch.cr4_guest_owned_bits));
>  }
>  
> +static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
> +				       struct vmcs12 *vmcs12)
> +{
> +	u32 idt_vectoring;
> +	unsigned int nr;
> +
> +	if (vcpu->arch.exception.pending) {
> +		nr = vcpu->arch.exception.nr;
> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
> +
> +		if (kvm_exception_is_soft(nr)) {
> +			vmcs12->vm_exit_instruction_len =
> +				vcpu->arch.event_exit_inst_len;
> +			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
> +		} else
> +			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
> +
> +		if (vcpu->arch.exception.has_error_code) {
> +			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
> +			vmcs12->idt_vectoring_error_code =
> +				vcpu->arch.exception.error_code;
> +		}
> +
> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
> +	} else if (vcpu->arch.nmi_pending) {
> +		vmcs12->idt_vectoring_info_field =
> +			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
> +	} else if (vcpu->arch.interrupt.pending) {
> +		nr = vcpu->arch.interrupt.nr;
> +		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
> +
> +		if (vcpu->arch.interrupt.soft) {
> +			idt_vectoring |= INTR_TYPE_SOFT_INTR;
> +			vmcs12->vm_entry_instruction_len =
> +				vcpu->arch.event_exit_inst_len;
> +		} else
> +			idt_vectoring |= INTR_TYPE_EXT_INTR;
> +
> +		vmcs12->idt_vectoring_info_field = idt_vectoring;
> +	}
else
   vmcs12->idt_vectoring_info_field = 0.

Also you can drop
vmcs12->idt_vectoring_info_field = to_vmx(vcpu)->idt_vectoring_info;
from prepare_vmcs12().

> +}
> +
>  /*
>   * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
>   * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
> @@ -7416,9 +7428,20 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>  	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
>  
> -	/* clear vm-entry fields which are to be cleared on exit */
>  	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
> -		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
> +		/*
> +		 * Transfer the event that L0 or L1 may wanted to inject into
> +		 * L2 to IDT_VECTORING_INFO_FIELD.
> +		 */
> +		vmcs12_save_pending_event(vcpu, vmcs12);
> +
> +	/*
> +	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
> +	 * preserved above and would only end up incorrectly in L1.
> +	 */
> +	vcpu->arch.nmi_injected = false;
> +	kvm_clear_exception_queue(vcpu);
> +	kvm_clear_interrupt_queue(vcpu);
>  }
>  
>  /*
> @@ -7518,6 +7541,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
>  	int cpu;
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>  
> +	/* trying to cancel vmlaunch/vmresume is a bug */
> +	WARN_ON_ONCE(vmx->nested.nested_run_pending);
> +
>  	leave_guest_mode(vcpu);
>  	prepare_vmcs12(vcpu, vmcs12);
>  
> -- 
> 1.7.3.4

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8827b3b..9d9ff74 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6493,8 +6493,6 @@  static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
 
 static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 {
-	if (is_guest_mode(&vmx->vcpu))
-		return;
 	__vmx_complete_interrupts(&vmx->vcpu, vmx->idt_vectoring_info,
 				  VM_EXIT_INSTRUCTION_LEN,
 				  IDT_VECTORING_ERROR_CODE);
@@ -6502,8 +6500,6 @@  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
 static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 {
-	if (is_guest_mode(vcpu))
-		return;
 	__vmx_complete_interrupts(vcpu,
 				  vmcs_read32(VM_ENTRY_INTR_INFO_FIELD),
 				  VM_ENTRY_INSTRUCTION_LEN,
@@ -6535,21 +6531,6 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long debugctlmsr;
 
-	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		if (vmcs12->idt_vectoring_info_field &
-				VECTORING_INFO_VALID_MASK) {
-			vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-				vmcs12->idt_vectoring_info_field);
-			vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
-				vmcs12->vm_exit_instruction_len);
-			if (vmcs12->idt_vectoring_info_field &
-					VECTORING_INFO_DELIVER_CODE_MASK)
-				vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
-					vmcs12->idt_vectoring_error_code);
-		}
-	}
-
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
 		vmx->entry_time = ktime_get();
@@ -6708,17 +6689,6 @@  static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 	vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
-	if (is_guest_mode(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-		vmcs12->idt_vectoring_info_field = vmx->idt_vectoring_info;
-		if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
-			vmcs12->idt_vectoring_error_code =
-				vmcs_read32(IDT_VECTORING_ERROR_CODE);
-			vmcs12->vm_exit_instruction_len =
-				vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
-		}
-	}
-
 	vmx->loaded_vmcs->launched = 1;
 
 	vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
@@ -7325,6 +7295,48 @@  vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			vcpu->arch.cr4_guest_owned_bits));
 }
 
+static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
+				       struct vmcs12 *vmcs12)
+{
+	u32 idt_vectoring;
+	unsigned int nr;
+
+	if (vcpu->arch.exception.pending) {
+		nr = vcpu->arch.exception.nr;
+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+		if (kvm_exception_is_soft(nr)) {
+			vmcs12->vm_exit_instruction_len =
+				vcpu->arch.event_exit_inst_len;
+			idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
+		} else
+			idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
+
+		if (vcpu->arch.exception.has_error_code) {
+			idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
+			vmcs12->idt_vectoring_error_code =
+				vcpu->arch.exception.error_code;
+		}
+
+		vmcs12->idt_vectoring_info_field = idt_vectoring;
+	} else if (vcpu->arch.nmi_pending) {
+		vmcs12->idt_vectoring_info_field =
+			INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
+	} else if (vcpu->arch.interrupt.pending) {
+		nr = vcpu->arch.interrupt.nr;
+		idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
+
+		if (vcpu->arch.interrupt.soft) {
+			idt_vectoring |= INTR_TYPE_SOFT_INTR;
+			vmcs12->vm_entry_instruction_len =
+				vcpu->arch.event_exit_inst_len;
+		} else
+			idt_vectoring |= INTR_TYPE_EXT_INTR;
+
+		vmcs12->idt_vectoring_info_field = idt_vectoring;
+	}
+}
+
 /*
  * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
  * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
@@ -7416,9 +7428,20 @@  static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
 
-	/* clear vm-entry fields which are to be cleared on exit */
 	if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
-		vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
+		/*
+		 * Transfer the event that L0 or L1 may wanted to inject into
+		 * L2 to IDT_VECTORING_INFO_FIELD.
+		 */
+		vmcs12_save_pending_event(vcpu, vmcs12);
+
+	/*
+	 * Drop what we picked up for L2 via vmx_complete_interrupts. It is
+	 * preserved above and would only end up incorrectly in L1.
+	 */
+	vcpu->arch.nmi_injected = false;
+	kvm_clear_exception_queue(vcpu);
+	kvm_clear_interrupt_queue(vcpu);
 }
 
 /*
@@ -7518,6 +7541,9 @@  static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 	int cpu;
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 
+	/* trying to cancel vmlaunch/vmresume is a bug */
+	WARN_ON_ONCE(vmx->nested.nested_run_pending);
+
 	leave_guest_mode(vcpu);
 	prepare_vmcs12(vcpu, vmcs12);