diff mbox

[5/9,VMX] Do not re-execute INTn instruction.

Message ID 1241511275-2261-5-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov May 5, 2009, 8:14 a.m. UTC
Re-inject event instead. This is what Intel suggest. Also use correct
instruction length when re-injecting soft fault/interrupt.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    5 ++++-
 arch/x86/kvm/svm.c              |    6 +++---
 arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
 arch/x86/kvm/x86.c              |   13 ++++++++-----
 arch/x86/kvm/x86.h              |    9 ++++++++-
 5 files changed, 45 insertions(+), 17 deletions(-)

Comments

Sheng Yang May 6, 2009, 6:57 a.m. UTC | #1
On Tuesday 05 May 2009 16:14:31 Gleb Natapov wrote:
> Re-inject event instead. This is what Intel suggest. Also use correct
> instruction length when re-injecting soft fault/interrupt.

Thanks for fixing this!

> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    5 ++++-
>  arch/x86/kvm/svm.c              |    6 +++---
>  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
>  arch/x86/kvm/x86.c              |   13 ++++++++-----
>  arch/x86/kvm/x86.h              |    9 ++++++++-
>  5 files changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h index cc892f5..fea0429 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
>  	struct kvm_pio_request pio;
>  	void *pio_data;
>
> +	u8 event_exit_inst_len;
> +

Can we simply read the field when we need, instead of a new field?

>  	struct kvm_queued_exception {
>  		bool pending;
>  		bool has_error_code;
> @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
>
>  	struct kvm_queued_interrupt {
>  		bool pending;
> +		bool soft;
>  		u8 nr;
>  	} interrupt;
>
> @@ -510,7 +513,7 @@ struct kvm_x86_ops {
>  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
>  				unsigned char *hypercall_addr);
> -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);

Seems all current interrupt info are go with struct kvm_vcpu now, how about 
only take vcpu as parameter? Or using another struct kvm_queued_interrupt 
pointer in addition to vcpu?

>  	void (*set_nmi)(struct kvm_vcpu *vcpu);
>  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
>  				bool has_error_code, u32 error_code);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 14cdfce..d5173a2 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu,
> unsigned nr) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
>  }
>
> -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>
> @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> *svm) case SVM_EXITINTINFO_TYPE_EXEPT:
>  		/* In case of software exception do not reinject an exception
>  		   vector, but re-execute and instruction instead */
> -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> +		if (kvm_exception_is_soft(vector))
>  			break;
>  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>  			u32 err = svm->vmcb->control.exit_int_info_err;
> @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> *svm) kvm_queue_exception(&svm->vcpu, vector);
>  		break;
>  	case SVM_EXITINTINFO_TYPE_INTR:
> -		kvm_queue_interrupt(&svm->vcpu, vector);
> +		kvm_queue_interrupt(&svm->vcpu, vector, false);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a9b30e6..092a3ee 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu,
> unsigned nr, return;
>  	}
>
> -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> +	if (kvm_exception_is_soft(nr)) {

I noticed that DB_VECTOR was added as a soft one, but don't see the related 
chapter in the spec?
Gleb Natapov May 6, 2009, 9:27 a.m. UTC | #2
On Wed, May 06, 2009 at 02:57:10PM +0800, Sheng Yang wrote:
> On Tuesday 05 May 2009 16:14:31 Gleb Natapov wrote:
> > Re-inject event instead. This is what Intel suggest. Also use correct
> > instruction length when re-injecting soft fault/interrupt.
> 
> Thanks for fixing this!
> 
> > Signed-off-by: Gleb Natapov <gleb@redhat.com>
> > ---
> >  arch/x86/include/asm/kvm_host.h |    5 ++++-
> >  arch/x86/kvm/svm.c              |    6 +++---
> >  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
> >  arch/x86/kvm/x86.c              |   13 ++++++++-----
> >  arch/x86/kvm/x86.h              |    9 ++++++++-
> >  5 files changed, 45 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> > b/arch/x86/include/asm/kvm_host.h index cc892f5..fea0429 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
> >  	struct kvm_pio_request pio;
> >  	void *pio_data;
> >
> > +	u8 event_exit_inst_len;
> > +
> 
> Can we simply read the field when we need, instead of a new field?
> 
Usually relying on vm exit information to be valid before vm entry
is wrong because migration can happen in a meantime. In this particular
case it is not so obvious since we don't want to migrate pending soft
interrupt, but re-execute instruction instead (we could migrate it
theoretically and may be we should, but when migrating from AMD to
Intel we don't have this info anyway). Another case where instruction
length as read from vmx may be outdated at interrupt injection time is
if exception happened during interrupt delivery and exception should be
re-injected first.

> >  	struct kvm_queued_exception {
> >  		bool pending;
> >  		bool has_error_code;
> > @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
> >
> >  	struct kvm_queued_interrupt {
> >  		bool pending;
> > +		bool soft;
> >  		u8 nr;
> >  	} interrupt;
> >
> > @@ -510,7 +513,7 @@ struct kvm_x86_ops {
> >  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
> >  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
> >  				unsigned char *hypercall_addr);
> > -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> > +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
> 
> Seems all current interrupt info are go with struct kvm_vcpu now, how about 
> only take vcpu as parameter? Or using another struct kvm_queued_interrupt 
> pointer in addition to vcpu?
> 
Don't have any particular feelings one way or the other. Lets make it
kvm_vcpu only.

> >  	void (*set_nmi)(struct kvm_vcpu *vcpu);
> >  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
> >  				bool has_error_code, u32 error_code);
> > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> > index 14cdfce..d5173a2 100644
> > --- a/arch/x86/kvm/svm.c
> > +++ b/arch/x86/kvm/svm.c
> > @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu,
> > unsigned nr) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
> >  }
> >
> > -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> > +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
> >  {
> >  	struct vcpu_svm *svm = to_svm(vcpu);
> >
> > @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> > *svm) case SVM_EXITINTINFO_TYPE_EXEPT:
> >  		/* In case of software exception do not reinject an exception
> >  		   vector, but re-execute and instruction instead */
> > -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> > +		if (kvm_exception_is_soft(vector))
> >  			break;
> >  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
> >  			u32 err = svm->vmcb->control.exit_int_info_err;
> > @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm
> > *svm) kvm_queue_exception(&svm->vcpu, vector);
> >  		break;
> >  	case SVM_EXITINTINFO_TYPE_INTR:
> > -		kvm_queue_interrupt(&svm->vcpu, vector);
> > +		kvm_queue_interrupt(&svm->vcpu, vector, false);
> >  		break;
> >  	default:
> >  		break;
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index a9b30e6..092a3ee 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu,
> > unsigned nr, return;
> >  	}
> >
> > -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> > -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> > +	if (kvm_exception_is_soft(nr)) {
> 
> I noticed that DB_VECTOR was added as a soft one, but don't see the related 
> chapter in the spec?
> 
I was confused by AMD spec that defines int1 instruction (ICEBP), but now
I see that this instruction is not intercepted as #DB exception. Will fix.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity May 6, 2009, 9:30 a.m. UTC | #3
Gleb Natapov wrote:
>>> +	u8 event_exit_inst_len;
>>> +
>>>       
>> Can we simply read the field when we need, instead of a new field?
>>
>>     
> Usually relying on vm exit information to be valid before vm entry
> is wrong because migration can happen in a meantime. In this particular
> case it is not so obvious since we don't want to migrate pending soft
> interrupt, but re-execute instruction instead (we could migrate it
> theoretically and may be we should, but when migrating from AMD to
> Intel we don't have this info anyway). Another case where instruction
> length as read from vmx may be outdated at interrupt injection time is
> if exception happened during interrupt delivery and exception should be
> re-injected first.
>   

Note that in some cases we do keep things in vmcs/vmcb fields -- the 
registers, segments, etc.  This is because we have per-vendor accessors 
for them, so we maintain a "virtual data structure" that common code can 
access.

We could do something similar with the interrupt queue - keep part of it 
in the vmcs/vmcb and use accessors to modify it.  But I don't think it's 
worthwhile; for vmx we have to read and write it anyway (since, unlike 
the registers, the exit and entry fields are different) and for svm it's 
in memory anyway so reading and writing it back is very cheap.
Gregory Haskins May 6, 2009, 10:59 a.m. UTC | #4
Hi Gleb,

Gleb Natapov wrote:
> Re-inject event instead. This is what Intel suggest. Also use correct
> instruction length when re-injecting soft fault/interrupt.
>
> Signed-off-by: Gleb Natapov <gleb@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    5 ++++-
>  arch/x86/kvm/svm.c              |    6 +++---
>  arch/x86/kvm/vmx.c              |   29 ++++++++++++++++++++++-------
>  arch/x86/kvm/x86.c              |   13 ++++++++-----
>  arch/x86/kvm/x86.h              |    9 ++++++++-
>  5 files changed, 45 insertions(+), 17 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index cc892f5..fea0429 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -319,6 +319,8 @@ struct kvm_vcpu_arch {
>  	struct kvm_pio_request pio;
>  	void *pio_data;
>  
> +	u8 event_exit_inst_len;
> +
>  	struct kvm_queued_exception {
>  		bool pending;
>  		bool has_error_code;
> @@ -328,6 +330,7 @@ struct kvm_vcpu_arch {
>  
>  	struct kvm_queued_interrupt {
>  		bool pending;
> +		bool soft;
>  		u8 nr;
>  	} interrupt;
>  
> @@ -510,7 +513,7 @@ struct kvm_x86_ops {
>  	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
>  	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
>  				unsigned char *hypercall_addr);
> -	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
> +	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
>  	void (*set_nmi)(struct kvm_vcpu *vcpu);
>  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
>  				bool has_error_code, u32 error_code);
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 14cdfce..d5173a2 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -2284,7 +2284,7 @@ static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
>  		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
>  }
>  
> -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
> +static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
>  
> @@ -2392,7 +2392,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	case SVM_EXITINTINFO_TYPE_EXEPT:
>  		/* In case of software exception do not reinject an exception
>  		   vector, but re-execute and instruction instead */
> -		if (vector == BP_VECTOR || vector == OF_VECTOR)
> +		if (kvm_exception_is_soft(vector))
>  			break;
>  		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
>  			u32 err = svm->vmcb->control.exit_int_info_err;
> @@ -2402,7 +2402,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  			kvm_queue_exception(&svm->vcpu, vector);
>  		break;
>  	case SVM_EXITINTINFO_TYPE_INTR:
> -		kvm_queue_interrupt(&svm->vcpu, vector);
> +		kvm_queue_interrupt(&svm->vcpu, vector, false);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a9b30e6..092a3ee 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -779,8 +779,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
>  		return;
>  	}
>  
> -	if (nr == BP_VECTOR || nr == OF_VECTOR) {
> -		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
> +	if (kvm_exception_is_soft(nr)) {
> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
>  		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
>  	} else
>  		intr_info |= INTR_TYPE_HARD_EXCEPTION;
> @@ -2429,9 +2430,10 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>  }
>  
> -static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
> +static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	uint32_t intr;
>  
>  	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
>  
> @@ -2446,8 +2448,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
>  		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
>  		return;
>  	}
> -	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> -			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
> +	intr = irq | INTR_INFO_VALID_MASK;
> +	if (soft) {
> +		intr |= INTR_TYPE_SOFT_INTR;
> +		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
> +			     vmx->vcpu.arch.event_exit_inst_len);
> +	} else
> +		intr |= INTR_TYPE_EXT_INTR;
> +	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
>  }
>  
>  static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
> @@ -3008,6 +3016,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  					      GUEST_INTR_STATE_NMI);
>  			break;
>  		case INTR_TYPE_EXT_INTR:
> +		case INTR_TYPE_SOFT_INTR:
>  			kvm_clear_interrupt_queue(vcpu);
>  			break;
>  		case INTR_TYPE_HARD_EXCEPTION:
> @@ -3279,16 +3288,22 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
>  		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
>  				GUEST_INTR_STATE_NMI);
>  		break;
> -	case INTR_TYPE_HARD_EXCEPTION:
>  	case INTR_TYPE_SOFT_EXCEPTION:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>   

Minor nit: Would suggest a "/* fall through */" comment here to denote
the break was intentionally omitted (assuming it was, but I think so IIUC).

> +	case INTR_TYPE_HARD_EXCEPTION:
>  		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
>  			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
>  			kvm_queue_exception_e(&vmx->vcpu, vector, err);
>  		} else
>  			kvm_queue_exception(&vmx->vcpu, vector);
>  		break;
> +	case INTR_TYPE_SOFT_INTR:
> +		vmx->vcpu.arch.event_exit_inst_len =
> +			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
>  	case INTR_TYPE_EXT_INTR:
> -		kvm_queue_interrupt(&vmx->vcpu, vector);
> +		kvm_queue_interrupt(&vmx->vcpu, vector,
> +			type == INTR_TYPE_SOFT_INTR);
>  		break;
>  	default:
>  		break;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4596927..023842b 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1424,7 +1424,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
>  		return -ENXIO;
>  	vcpu_load(vcpu);
>  
> -	kvm_queue_interrupt(vcpu, irq->irq);
> +	kvm_queue_interrupt(vcpu, irq->irq, false);
>  
>  	vcpu_put(vcpu);
>  
> @@ -3136,7 +3136,8 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  	}
>  
>  	if (vcpu->arch.interrupt.pending) {
> -		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +				     vcpu->arch.interrupt.soft);
>  		return;
>  	}
>  
> @@ -3149,8 +3150,10 @@ static void inject_irq(struct kvm_vcpu *vcpu)
>  		}
>  	} else if (kvm_cpu_has_interrupt(vcpu)) {
>  		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
> -			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
> -			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
> +			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
> +					    false);
> +			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
> +					     false);
>  		}
>  	}
>  }
> @@ -4077,7 +4080,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
>  	pending_vec = find_first_bit(
>  		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
>  	if (pending_vec < max_bits) {
> -		kvm_queue_interrupt(vcpu, pending_vec);
> +		kvm_queue_interrupt(vcpu, pending_vec, false);
>  		pr_debug("Set back pending irq %d\n", pending_vec);
>  		if (irqchip_in_kernel(vcpu->kvm))
>  			kvm_pic_clear_isr_ack(vcpu->kvm);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index c1f1a8c..2817c86 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
>  	vcpu->arch.exception.pending = false;
>  }
>  
> -static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
> +static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
> +	bool soft)
>  {
>  	vcpu->arch.interrupt.pending = true;
> +	vcpu->arch.interrupt.soft = soft;
>  	vcpu->arch.interrupt.nr = vector;
>  }
>  
> @@ -24,4 +26,9 @@ static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
>  	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
>  		vcpu->arch.nmi_injected;
>  }
> +
> +static inline bool kvm_exception_is_soft(unsigned int nr)
> +{
> +	return (nr == BP_VECTOR) || (nr == OF_VECTOR) || (nr == DB_VECTOR);
> +}
>  #endif
>
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index cc892f5..fea0429 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -319,6 +319,8 @@  struct kvm_vcpu_arch {
 	struct kvm_pio_request pio;
 	void *pio_data;
 
+	u8 event_exit_inst_len;
+
 	struct kvm_queued_exception {
 		bool pending;
 		bool has_error_code;
@@ -328,6 +330,7 @@  struct kvm_vcpu_arch {
 
 	struct kvm_queued_interrupt {
 		bool pending;
+		bool soft;
 		u8 nr;
 	} interrupt;
 
@@ -510,7 +513,7 @@  struct kvm_x86_ops {
 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
 	void (*patch_hypercall)(struct kvm_vcpu *vcpu,
 				unsigned char *hypercall_addr);
-	void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
+	void (*set_irq)(struct kvm_vcpu *vcpu, int vec, bool soft);
 	void (*set_nmi)(struct kvm_vcpu *vcpu);
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 14cdfce..d5173a2 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2284,7 +2284,7 @@  static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr)
 		SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
 }
 
-static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
+static void svm_set_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -2392,7 +2392,7 @@  static void svm_complete_interrupts(struct vcpu_svm *svm)
 	case SVM_EXITINTINFO_TYPE_EXEPT:
 		/* In case of software exception do not reinject an exception
 		   vector, but re-execute and instruction instead */
-		if (vector == BP_VECTOR || vector == OF_VECTOR)
+		if (kvm_exception_is_soft(vector))
 			break;
 		if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
 			u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2402,7 +2402,7 @@  static void svm_complete_interrupts(struct vcpu_svm *svm)
 			kvm_queue_exception(&svm->vcpu, vector);
 		break;
 	case SVM_EXITINTINFO_TYPE_INTR:
-		kvm_queue_interrupt(&svm->vcpu, vector);
+		kvm_queue_interrupt(&svm->vcpu, vector, false);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a9b30e6..092a3ee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -779,8 +779,9 @@  static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 		return;
 	}
 
-	if (nr == BP_VECTOR || nr == OF_VECTOR) {
-		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+	if (kvm_exception_is_soft(nr)) {
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
 		intr_info |= INTR_TYPE_SOFT_EXCEPTION;
 	} else
 		intr_info |= INTR_TYPE_HARD_EXCEPTION;
@@ -2429,9 +2430,10 @@  static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
-static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
+static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq, bool soft)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	uint32_t intr;
 
 	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
@@ -2446,8 +2448,14 @@  static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 		kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1);
 		return;
 	}
-	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
-			irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
+	intr = irq | INTR_INFO_VALID_MASK;
+	if (soft) {
+		intr |= INTR_TYPE_SOFT_INTR;
+		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
+			     vmx->vcpu.arch.event_exit_inst_len);
+	} else
+		intr |= INTR_TYPE_EXT_INTR;
+	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
 }
 
 static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
@@ -3008,6 +3016,7 @@  static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 					      GUEST_INTR_STATE_NMI);
 			break;
 		case INTR_TYPE_EXT_INTR:
+		case INTR_TYPE_SOFT_INTR:
 			kvm_clear_interrupt_queue(vcpu);
 			break;
 		case INTR_TYPE_HARD_EXCEPTION:
@@ -3279,16 +3288,22 @@  static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_NMI);
 		break;
-	case INTR_TYPE_HARD_EXCEPTION:
 	case INTR_TYPE_SOFT_EXCEPTION:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
+	case INTR_TYPE_HARD_EXCEPTION:
 		if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) {
 			u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE);
 			kvm_queue_exception_e(&vmx->vcpu, vector, err);
 		} else
 			kvm_queue_exception(&vmx->vcpu, vector);
 		break;
+	case INTR_TYPE_SOFT_INTR:
+		vmx->vcpu.arch.event_exit_inst_len =
+			vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
 	case INTR_TYPE_EXT_INTR:
-		kvm_queue_interrupt(&vmx->vcpu, vector);
+		kvm_queue_interrupt(&vmx->vcpu, vector,
+			type == INTR_TYPE_SOFT_INTR);
 		break;
 	default:
 		break;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4596927..023842b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1424,7 +1424,7 @@  static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 		return -ENXIO;
 	vcpu_load(vcpu);
 
-	kvm_queue_interrupt(vcpu, irq->irq);
+	kvm_queue_interrupt(vcpu, irq->irq, false);
 
 	vcpu_put(vcpu);
 
@@ -3136,7 +3136,8 @@  static void inject_irq(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.interrupt.pending) {
-		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+		kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
+				     vcpu->arch.interrupt.soft);
 		return;
 	}
 
@@ -3149,8 +3150,10 @@  static void inject_irq(struct kvm_vcpu *vcpu)
 		}
 	} else if (kvm_cpu_has_interrupt(vcpu)) {
 		if (kvm_x86_ops->interrupt_allowed(vcpu)) {
-			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu));
-			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr);
+			kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu),
+					    false);
+			kvm_x86_ops->set_irq(vcpu, vcpu->arch.interrupt.nr,
+					     false);
 		}
 	}
 }
@@ -4077,7 +4080,7 @@  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-		kvm_queue_interrupt(vcpu, pending_vec);
+		kvm_queue_interrupt(vcpu, pending_vec, false);
 		pr_debug("Set back pending irq %d\n", pending_vec);
 		if (irqchip_in_kernel(vcpu->kvm))
 			kvm_pic_clear_isr_ack(vcpu->kvm);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c1f1a8c..2817c86 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -8,9 +8,11 @@  static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
 	vcpu->arch.exception.pending = false;
 }
 
-static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
+static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
+	bool soft)
 {
 	vcpu->arch.interrupt.pending = true;
+	vcpu->arch.interrupt.soft = soft;
 	vcpu->arch.interrupt.nr = vector;
 }
 
@@ -24,4 +26,9 @@  static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
 	return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
 		vcpu->arch.nmi_injected;
 }
+
+static inline bool kvm_exception_is_soft(unsigned int nr)
+{
+	return (nr == BP_VECTOR) || (nr == OF_VECTOR) || (nr == DB_VECTOR);
+}
 #endif