diff mbox

[PART1,RFC,v3,07/12] svm: Add interrupt injection via AVIC

Message ID 1458281388-14452-8-git-send-email-Suravee.Suthikulpanit@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Suthikulpanit, Suravee March 18, 2016, 6:09 a.m. UTC
This patch introduces a new mechanism to inject interrupt using AVIC.
Since VINTR is not supported when enable AVIC, we need to inject
interrupt via APIC backing page instead.

This patch also adds support for AVIC doorbell, which is used by
KVM to signal a running vcpu to check IRR for injected interrupts.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 arch/x86/kvm/svm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

Comments

Paolo Bonzini March 18, 2016, 10:22 a.m. UTC | #1
On 18/03/2016 07:09, Suravee Suthikulpanit wrote:
> This patch introduces a new mechanism to inject interrupt using AVIC.
> Since VINTR is not supported when enable AVIC, we need to inject
> interrupt via APIC backing page instead.
> 
> This patch also adds support for AVIC doorbell, which is used by
> KVM to signal a running vcpu to check IRR for injected interrupts.
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

Looks good, but I think it breaks nested virtualization.  See below.

> ---
>  arch/x86/kvm/svm.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 621c948..8e31ad3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -71,6 +71,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
>  #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
>  #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
>  
> +#define SVM_AVIC_DOORBELL	0xc001011b
> +
>  #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
>  #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
>  #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
> @@ -217,6 +219,8 @@ static bool npt_enabled = true;
>  static bool npt_enabled;
>  #endif
>  
> +static struct kvm_x86_ops svm_x86_ops;
> +
>  /* allow nested paging (virtualized MMU) for all guests */
>  static int npt = true;
>  module_param(npt, int, S_IRUGO);
> @@ -291,6 +295,18 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
>  	return container_of(vcpu, struct vcpu_svm, vcpu);
>  }
>  
> +static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +	u64 *entry;
> +
> +	entry = svm->avic_phy_apic_id_cache;
> +	if (!entry)
> +		return false;
> +
> +	return (READ_ONCE(*entry) & AVIC_PHY_APIC_ID__IS_RUN_MSK);

AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK

> +}
> +
>  static void recalc_intercepts(struct vcpu_svm *svm)
>  {
>  	struct vmcb_control_area *c, *h;
> @@ -964,6 +980,8 @@ static __init int svm_hardware_setup(void)
>  
>  	if (avic) {
>  		printk(KERN_INFO "kvm: AVIC enabled\n");
> +	} else {
> +		svm_x86_ops.deliver_posted_interrupt = NULL;
>  	}
>  
>  	return 0;
> @@ -2877,8 +2895,10 @@ static int clgi_interception(struct vcpu_svm *svm)
>  	disable_gif(svm);
>  
>  	/* After a CLGI no interrupts should come */
> -	svm_clear_vintr(svm);
> -	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> +	if (!svm_vcpu_avic_enabled(svm)) {
> +		svm_clear_vintr(svm);
> +		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> +	}

This is for nested virtualization.  Unless you support nested AVIC, the
L2 guest should run without AVIC (i.e. IsRunning should be false) and
use the old VINTR mechanism.

>  	mark_dirty(svm->vmcb, VMCB_INTR);
>  
> @@ -3453,6 +3473,9 @@ static int msr_interception(struct vcpu_svm *svm)
>  
>  static int interrupt_window_interception(struct vcpu_svm *svm)
>  {
> +	if (svm_vcpu_avic_enabled(svm))
> +		BUG();

Please WARN and return, but I suspect this may also need some changes
for nested virtualization.

>  	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
>  	svm_clear_vintr(svm);
>  	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
> @@ -3767,6 +3790,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
>  {
>  	struct vmcb_control_area *control;
>  
> +	/* The following fields are ignored when AVIC is enabled */
>  	control = &svm->vmcb->control;
>  	control->int_vector = irq;
>  	control->int_ctl &= ~V_INTR_PRIO_MASK;
> @@ -3844,6 +3868,20 @@ static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>  	return;
>  }
>  
> +static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
> +{
> +	struct vcpu_svm *svm = to_svm(vcpu);
> +
> +	kvm_lapic_set_vector(vec, svm->vcpu.arch.apic->regs + APIC_IRR);
> +	smp_mb__after_atomic();
> +
> +	if (avic_vcpu_is_running(vcpu))
> +		wrmsrl(SVM_AVIC_DOORBELL,
> +		       __default_cpu_present_to_apicid(vcpu->cpu));
> +	else
> +		kvm_vcpu_wake_up(vcpu);
> +}
> +
>  static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_svm *svm = to_svm(vcpu);
> @@ -3904,6 +3942,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>  	 * get that intercept, this function will be called again though and
>  	 * we'll get the vintr intercept.
>  	 */
> +	if (svm_vcpu_avic_enabled(svm))
> +		return;

Same here.

>  	if (gif_set(svm) && nested_svm_intr(svm)) {
>  		svm_set_vintr(svm);
>  		svm_inject_irq(svm, 0x0);
> @@ -4638,6 +4679,7 @@ static struct kvm_x86_ops svm_x86_ops = {
>  	.sched_in = svm_sched_in,
>  
>  	.pmu_ops = &amd_pmu_ops,
> +	.deliver_posted_interrupt = svm_deliver_avic_intr,
>  };
>  
>  static int __init svm_init(void)
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Suthikulpanit, Suravee April 5, 2016, 1:26 p.m. UTC | #2
Hi Paolo,

On 3/18/16 17:22, Paolo Bonzini wrote:
>
>
> On 18/03/2016 07:09, Suravee Suthikulpanit wrote:
>> This patch introduces a new mechanism to inject interrupt using AVIC.
>> Since VINTR is not supported when enable AVIC, we need to inject
>> interrupt via APIC backing page instead.
>>
>> This patch also adds support for AVIC doorbell, which is used by
>> KVM to signal a running vcpu to check IRR for injected interrupts.
>>
>> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
>
> Looks good, but I think it breaks nested virtualization.  See below.
>
>> [...]
>> @@ -2877,8 +2895,10 @@ static int clgi_interception(struct vcpu_svm *svm)
>>   	disable_gif(svm);
>>
>>   	/* After a CLGI no interrupts should come */
>> -	svm_clear_vintr(svm);
>> -	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
>> +	if (!svm_vcpu_avic_enabled(svm)) {
>> +		svm_clear_vintr(svm);
>> +		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
>> +	}
>
> This is for nested virtualization.  Unless you support nested AVIC, the
> L2 guest should run without AVIC (i.e. IsRunning should be false) and
> use the old VINTR mechanism.

I see. I am not planning to supported nested AVIC at the L2 level for 
the moment. If it is alright, I would like to get the basic AVIC and 
IOMMU in first (unless you have a different opinion).

In that case, I think I should also make sure to not expose AVIC CPUID 
to the guest VM.

>> [...]
>> @@ -3904,6 +3942,9 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
>>   	 * get that intercept, this function will be called again though and
>>   	 * we'll get the vintr intercept.
>>   	 */
>> +	if (svm_vcpu_avic_enabled(svm))
>> +		return;
>
> Same here.

If I make change so that we do not expose the AVIC CPUID to the L1 
guest, then the L1 KVM driver should not be setting up AVIC for the L2 
vcpus. And, in this case, the svm_vcpu_avic_enabled(svm) should return 
false. I've not tested with nested VM. I will give that a try.

Thanks,
Suravee
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Suthikulpanit, Suravee April 5, 2016, 3:56 p.m. UTC | #3
Hi Paolo,

On 4/5/16 20:26, Suravee Suthikulpanit wrote:
>>> @@ -2877,8 +2895,10 @@ static int clgi_interception(struct vcpu_svm
>>> *svm)
>>>       disable_gif(svm);
>>>
>>>       /* After a CLGI no interrupts should come */
>>> -    svm_clear_vintr(svm);
>>> -    svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
>>> +    if (!svm_vcpu_avic_enabled(svm)) {
>>> +        svm_clear_vintr(svm);
>>> +        svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
>>> +    }
>>
>> This is for nested virtualization.  Unless you support nested AVIC, the
>> L2 guest should run without AVIC (i.e. IsRunning should be false) and
>> use the old VINTR mechanism.
>
> I see. I am not planning to supported nested AVIC at the L2 level for
> the moment. If it is alright, I would like to get the basic AVIC and
> IOMMU in first (unless you have a different opinion).
>
> In that case, I think I should also make sure to not expose AVIC CPUID
> to the guest VM.

Actually, it should have already not set the AVIC CPUID in the L1 guest.

Thanks,
Suravee
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 621c948..8e31ad3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -71,6 +71,8 @@  MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
 
+#define SVM_AVIC_DOORBELL	0xc001011b
+
 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
@@ -217,6 +219,8 @@  static bool npt_enabled = true;
 static bool npt_enabled;
 #endif
 
+static struct kvm_x86_ops svm_x86_ops;
+
 /* allow nested paging (virtualized MMU) for all guests */
 static int npt = true;
 module_param(npt, int, S_IRUGO);
@@ -291,6 +295,18 @@  static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
+static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 *entry;
+
+	entry = svm->avic_phy_apic_id_cache;
+	if (!entry)
+		return false;
+
+	return (READ_ONCE(*entry) & AVIC_PHY_APIC_ID__IS_RUN_MSK);
+}
+
 static void recalc_intercepts(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *c, *h;
@@ -964,6 +980,8 @@  static __init int svm_hardware_setup(void)
 
 	if (avic) {
 		printk(KERN_INFO "kvm: AVIC enabled\n");
+	} else {
+		svm_x86_ops.deliver_posted_interrupt = NULL;
 	}
 
 	return 0;
@@ -2877,8 +2895,10 @@  static int clgi_interception(struct vcpu_svm *svm)
 	disable_gif(svm);
 
 	/* After a CLGI no interrupts should come */
-	svm_clear_vintr(svm);
-	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+	if (!svm_vcpu_avic_enabled(svm)) {
+		svm_clear_vintr(svm);
+		svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
+	}
 
 	mark_dirty(svm->vmcb, VMCB_INTR);
 
@@ -3453,6 +3473,9 @@  static int msr_interception(struct vcpu_svm *svm)
 
 static int interrupt_window_interception(struct vcpu_svm *svm)
 {
+	if (svm_vcpu_avic_enabled(svm))
+		BUG();
+
 	kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
@@ -3767,6 +3790,7 @@  static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
 	struct vmcb_control_area *control;
 
+	/* The following fields are ignored when AVIC is enabled */
 	control = &svm->vmcb->control;
 	control->int_vector = irq;
 	control->int_ctl &= ~V_INTR_PRIO_MASK;
@@ -3844,6 +3868,20 @@  static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	return;
 }
 
+static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	kvm_lapic_set_vector(vec, svm->vcpu.arch.apic->regs + APIC_IRR);
+	smp_mb__after_atomic();
+
+	if (avic_vcpu_is_running(vcpu))
+		wrmsrl(SVM_AVIC_DOORBELL,
+		       __default_cpu_present_to_apicid(vcpu->cpu));
+	else
+		kvm_vcpu_wake_up(vcpu);
+}
+
 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3904,6 +3942,9 @@  static void enable_irq_window(struct kvm_vcpu *vcpu)
 	 * get that intercept, this function will be called again though and
 	 * we'll get the vintr intercept.
 	 */
+	if (svm_vcpu_avic_enabled(svm))
+		return;
+
 	if (gif_set(svm) && nested_svm_intr(svm)) {
 		svm_set_vintr(svm);
 		svm_inject_irq(svm, 0x0);
@@ -4638,6 +4679,7 @@  static struct kvm_x86_ops svm_x86_ops = {
 	.sched_in = svm_sched_in,
 
 	.pmu_ops = &amd_pmu_ops,
+	.deliver_posted_interrupt = svm_deliver_avic_intr,
 };
 
 static int __init svm_init(void)