diff mbox

KVM: x86: implement IOAPIC_REG_EOI for directed EOI support

Message ID 20170411141115.4314-1-lprosek@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ladi Prosek April 11, 2017, 2:11 p.m. UTC
If the guest takes advantage of the directed EOI feature by setting
APIC_SPIV_DIRECTED_EOI, it is expected to signal EOI by writing to
the EOI register of the respective IOAPIC.

From Intel's x2APIC Specification:
"following the EOI to the local x2APIC unit for a level triggered
interrupt, perform a directed EOI to the IOxAPIC generating the
interrupt by writing to its EOI register."

Commit fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
inhibited EOI on LAPIC EOI register write but didn't add the IOAPIC
part. IOAPIC_REG_EOI writes were handled only on IA64 and the code
was later removed with the rest of IA64 support.

The bug has gone undetected for a long time because Linux writes to
IOAPIC_REG_EOI only if the IOAPIC version is >=0x20. Windows doesn't
seem to perform such a check.

This commit re-adds IOAPIC_REG_EOI and implements it in terms of
__kvm_ioapic_update_eoi.

Fixes: fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
Signed-off-by: Ladi Prosek <lprosek@redhat.com>
---
 arch/x86/kvm/ioapic.c | 46 ++++++++++++++++++++++++++++------------------
 arch/x86/kvm/ioapic.h |  1 +
 2 files changed, 29 insertions(+), 18 deletions(-)

Comments

Peter Xu April 12, 2017, 6:40 a.m. UTC | #1
On Tue, Apr 11, 2017 at 04:11:15PM +0200, Ladi Prosek wrote:
> If the guest takes advantage of the directed EOI feature by setting
> APIC_SPIV_DIRECTED_EOI, it is expected to signal EOI by writing to
> the EOI register of the respective IOAPIC.
> 
> From Intel's x2APIC Specification:
> "following the EOI to the local x2APIC unit for a level triggered
> interrupt, perform a directed EOI to the IOxAPIC generating the
> interrupt by writing to its EOI register."
> 
> Commit fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
> inhibited EOI on LAPIC EOI register write but didn't add the IOAPIC
> part. IOAPIC_REG_EOI writes were handled only on IA64 and the code
> was later removed with the rest of IA64 support.
> 
> The bug has gone undetected for a long time because Linux writes to
> IOAPIC_REG_EOI only if the IOAPIC version is >=0x20. Windows doesn't
> seem to perform such a check.

Hi, Ladi,

Not sure I'm understanding it correctly... I see "direct EOI" is a
feature for IOAPIC version 0x20, while "suppress EOI-broadcast" is
another feature for APIC. They are not the same feature, so it may not
be required to have them all together. IIUC current x86 kvm is just
the case - it supports EOI broadcast suppression on APIC, but it does
not support direct EOI on kernel IOAPIC.

I think the problem is why the guest setup APIC_SPIV_DIRECTED_EOI even
if IOAPIC does not support direct EOI (the guest can know it by
probing IOAPIC version). Please correct if I'm wrong.

> 
> This commit re-adds IOAPIC_REG_EOI and implements it in terms of
> __kvm_ioapic_update_eoi.
> 
> Fixes: fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
> Signed-off-by: Ladi Prosek <lprosek@redhat.com>
> ---
>  arch/x86/kvm/ioapic.c | 46 ++++++++++++++++++++++++++++------------------
>  arch/x86/kvm/ioapic.h |  1 +
>  2 files changed, 29 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
> index 289270a..8df1c6c 100644
> --- a/arch/x86/kvm/ioapic.c
> +++ b/arch/x86/kvm/ioapic.c
> @@ -415,14 +415,15 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
>  #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
>  
>  static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
> -			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
> +			struct kvm_ioapic *ioapic, int vector, int trigger_mode,
> +			bool directed)
>  {
>  	struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
>  	struct kvm_lapic *apic = vcpu->arch.apic;
>  	int i;
>  
>  	/* RTC special handling */
> -	if (test_bit(vcpu->vcpu_id, dest_map->map) &&
> +	if (!directed && test_bit(vcpu->vcpu_id, dest_map->map) &&
>  	    vector == dest_map->vectors[vcpu->vcpu_id])
>  		rtc_irq_eoi(ioapic, vcpu);
>  
> @@ -432,21 +433,23 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
>  		if (ent->fields.vector != vector)
>  			continue;
>  
> -		/*
> -		 * We are dropping lock while calling ack notifiers because ack
> -		 * notifier callbacks for assigned devices call into IOAPIC
> -		 * recursively. Since remote_irr is cleared only after call
> -		 * to notifiers if the same vector will be delivered while lock
> -		 * is dropped it will be put into irr and will be delivered
> -		 * after ack notifier returns.
> -		 */
> -		spin_unlock(&ioapic->lock);
> -		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> -		spin_lock(&ioapic->lock);
> -
> -		if (trigger_mode != IOAPIC_LEVEL_TRIG ||
> -		    kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
> -			continue;
> +		if (!directed) {

Could I ask why we need to skip this if the EOI is sent via direct EOI
register of IOAPIC?

Thanks,

> +			/*
> +			 * We are dropping lock while calling ack notifiers because ack
> +			 * notifier callbacks for assigned devices call into IOAPIC
> +			 * recursively. Since remote_irr is cleared only after call
> +			 * to notifiers if the same vector will be delivered while lock
> +			 * is dropped it will be put into irr and will be delivered
> +			 * after ack notifier returns.
> +			 */
> +			spin_unlock(&ioapic->lock);
> +			kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> +			spin_lock(&ioapic->lock);
> +
> +			if (trigger_mode != IOAPIC_LEVEL_TRIG ||
> +			    kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
> +				continue;
> +		}
>  
>  		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>  		ent->fields.remote_irr = 0;
> @@ -478,7 +481,7 @@ void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
>  	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
>  
>  	spin_lock(&ioapic->lock);
> -	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
> +	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode, false);
>  	spin_unlock(&ioapic->lock);
>  }
>  
> @@ -540,6 +543,7 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
>  				 gpa_t addr, int len, const void *val)
>  {
>  	struct kvm_ioapic *ioapic = to_ioapic(this);
> +	struct kvm_lapic *apic = vcpu->arch.apic;
>  	u32 data;
>  	if (!ioapic_in_range(ioapic, addr))
>  		return -EOPNOTSUPP;
> @@ -575,6 +579,12 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
>  		ioapic_write_indirect(ioapic, data);
>  		break;
>  
> +	case IOAPIC_REG_EOI:
> +		if (kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
> +			__kvm_ioapic_update_eoi(vcpu, ioapic, data,
> +						IOAPIC_LEVEL_TRIG, true);
> +		break;
> +
>  	default:
>  		break;
>  	}
> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
> index 1cc6e54..251b61b 100644
> --- a/arch/x86/kvm/ioapic.h
> +++ b/arch/x86/kvm/ioapic.h
> @@ -20,6 +20,7 @@ struct kvm_vcpu;
>  /* Direct registers. */
>  #define IOAPIC_REG_SELECT  0x00
>  #define IOAPIC_REG_WINDOW  0x10
> +#define IOAPIC_REG_EOI     0x40
>  
>  /* Indirect registers. */
>  #define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
> -- 
> 2.9.3
>
Ladi Prosek April 12, 2017, 7:36 a.m. UTC | #2
On Wed, Apr 12, 2017 at 8:40 AM, Peter Xu <peterx@redhat.com> wrote:
> On Tue, Apr 11, 2017 at 04:11:15PM +0200, Ladi Prosek wrote:
>> If the guest takes advantage of the directed EOI feature by setting
>> APIC_SPIV_DIRECTED_EOI, it is expected to signal EOI by writing to
>> the EOI register of the respective IOAPIC.
>>
>> From Intel's x2APIC Specification:
>> "following the EOI to the local x2APIC unit for a level triggered
>> interrupt, perform a directed EOI to the IOxAPIC generating the
>> interrupt by writing to its EOI register."
>>
>> Commit fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
>> inhibited EOI on LAPIC EOI register write but didn't add the IOAPIC
>> part. IOAPIC_REG_EOI writes were handled only on IA64 and the code
>> was later removed with the rest of IA64 support.
>>
>> The bug has gone undetected for a long time because Linux writes to
>> IOAPIC_REG_EOI only if the IOAPIC version is >=0x20. Windows doesn't
>> seem to perform such a check.
>
> Hi, Ladi,

Hi Peter,

> Not sure I'm understanding it correctly... I see "direct EOI" is a
> feature for IOAPIC version 0x20, while "suppress EOI-broadcast" is
> another feature for APIC. They are not the same feature, so it may not
> be required to have them all together. IIUC current x86 kvm is just
> the case - it supports EOI broadcast suppression on APIC, but it does
> not support direct EOI on kernel IOAPIC.

Thanks, that makes perfect sense and explains why Linux behaves the
way it does (__eoi_ioapic_pin in arch/x86/kernel/apic/io_apic.c).

This document makes it look like suppress EOI-broadcast always implies
directed EOI, though:

http://www.intel.com/content/dam/doc/specification-update/64-architecture-x2apic-specification.pdf

NB "The support for Directed EOI capability can be detected by means
of bit 24 in the Local APIC Version Register. "

There is no mention of APIC version or any other detection mechanism
for directed EOI. Maybe the chip being x2APIC implies version >= 0x20
but I don't see that in the document either.

I suspect that Microsoft implemented EOI by following this same spec.
Level-triggered interrupts don't work right on Windows Server 2016
with Hyper-V enabled without this patch.

> I think the problem is why the guest setup APIC_SPIV_DIRECTED_EOI even
> if IOAPIC does not support direct EOI (the guest can know it by
> probing IOAPIC version). Please correct if I'm wrong.

Yes, I think that the guest is to blame here. We might add that to the
commit message.

>>
>> This commit re-adds IOAPIC_REG_EOI and implements it in terms of
>> __kvm_ioapic_update_eoi.
>>
>> Fixes: fc61b800f950 ("KVM: Add Directed EOI support to APIC emulation")
>> Signed-off-by: Ladi Prosek <lprosek@redhat.com>
>> ---
>>  arch/x86/kvm/ioapic.c | 46 ++++++++++++++++++++++++++++------------------
>>  arch/x86/kvm/ioapic.h |  1 +
>>  2 files changed, 29 insertions(+), 18 deletions(-)
>>
>> diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
>> index 289270a..8df1c6c 100644
>> --- a/arch/x86/kvm/ioapic.c
>> +++ b/arch/x86/kvm/ioapic.c
>> @@ -415,14 +415,15 @@ static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
>>  #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
>>
>>  static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
>> -                     struct kvm_ioapic *ioapic, int vector, int trigger_mode)
>> +                     struct kvm_ioapic *ioapic, int vector, int trigger_mode,
>> +                     bool directed)
>>  {
>>       struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
>>       struct kvm_lapic *apic = vcpu->arch.apic;
>>       int i;
>>
>>       /* RTC special handling */
>> -     if (test_bit(vcpu->vcpu_id, dest_map->map) &&
>> +     if (!directed && test_bit(vcpu->vcpu_id, dest_map->map) &&
>>           vector == dest_map->vectors[vcpu->vcpu_id])
>>               rtc_irq_eoi(ioapic, vcpu);
>>
>> @@ -432,21 +433,23 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
>>               if (ent->fields.vector != vector)
>>                       continue;
>>
>> -             /*
>> -              * We are dropping lock while calling ack notifiers because ack
>> -              * notifier callbacks for assigned devices call into IOAPIC
>> -              * recursively. Since remote_irr is cleared only after call
>> -              * to notifiers if the same vector will be delivered while lock
>> -              * is dropped it will be put into irr and will be delivered
>> -              * after ack notifier returns.
>> -              */
>> -             spin_unlock(&ioapic->lock);
>> -             kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>> -             spin_lock(&ioapic->lock);
>> -
>> -             if (trigger_mode != IOAPIC_LEVEL_TRIG ||
>> -                 kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
>> -                     continue;
>> +             if (!directed) {
>
> Could I ask why we need to skip this if the EOI is sent via direct EOI
> register of IOAPIC?

Because it's already been done as part of the local EOI. With directed
EOI we hit this function twice, first time when doing the local EOI
and then the newly added code path for IOAPIC EOI with directed=true.

I, again, followed the above mentioned document which explicitly
dictates the sequence. And I mechanically split the function to the
"local part' - what it had been doing up to the continue statement -
and the "directed part" - what it had been skipping. I'll admit that
my familiarity with this code is limited and there may be a better way
to do this.

Thanks!
Ladi

> Thanks,
>
>> +                     /*
>> +                      * We are dropping lock while calling ack notifiers because ack
>> +                      * notifier callbacks for assigned devices call into IOAPIC
>> +                      * recursively. Since remote_irr is cleared only after call
>> +                      * to notifiers if the same vector will be delivered while lock
>> +                      * is dropped it will be put into irr and will be delivered
>> +                      * after ack notifier returns.
>> +                      */
>> +                     spin_unlock(&ioapic->lock);
>> +                     kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>> +                     spin_lock(&ioapic->lock);
>> +
>> +                     if (trigger_mode != IOAPIC_LEVEL_TRIG ||
>> +                         kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
>> +                             continue;
>> +             }
>>
>>               ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>               ent->fields.remote_irr = 0;
>> @@ -478,7 +481,7 @@ void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
>>       struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
>>
>>       spin_lock(&ioapic->lock);
>> -     __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
>> +     __kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode, false);
>>       spin_unlock(&ioapic->lock);
>>  }
>>
>> @@ -540,6 +543,7 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
>>                                gpa_t addr, int len, const void *val)
>>  {
>>       struct kvm_ioapic *ioapic = to_ioapic(this);
>> +     struct kvm_lapic *apic = vcpu->arch.apic;
>>       u32 data;
>>       if (!ioapic_in_range(ioapic, addr))
>>               return -EOPNOTSUPP;
>> @@ -575,6 +579,12 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
>>               ioapic_write_indirect(ioapic, data);
>>               break;
>>
>> +     case IOAPIC_REG_EOI:
>> +             if (kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
>> +                     __kvm_ioapic_update_eoi(vcpu, ioapic, data,
>> +                                             IOAPIC_LEVEL_TRIG, true);
>> +             break;
>> +
>>       default:
>>               break;
>>       }
>> diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
>> index 1cc6e54..251b61b 100644
>> --- a/arch/x86/kvm/ioapic.h
>> +++ b/arch/x86/kvm/ioapic.h
>> @@ -20,6 +20,7 @@ struct kvm_vcpu;
>>  /* Direct registers. */
>>  #define IOAPIC_REG_SELECT  0x00
>>  #define IOAPIC_REG_WINDOW  0x10
>> +#define IOAPIC_REG_EOI     0x40
>>
>>  /* Indirect registers. */
>>  #define IOAPIC_REG_APIC_ID 0x00      /* x86 IOAPIC only */
>> --
>> 2.9.3
>>
>
> --
> Peter Xu
diff mbox

Patch

diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 289270a..8df1c6c 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -415,14 +415,15 @@  static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
 #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
 
 static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
-			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
+			struct kvm_ioapic *ioapic, int vector, int trigger_mode,
+			bool directed)
 {
 	struct dest_map *dest_map = &ioapic->rtc_status.dest_map;
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int i;
 
 	/* RTC special handling */
-	if (test_bit(vcpu->vcpu_id, dest_map->map) &&
+	if (!directed && test_bit(vcpu->vcpu_id, dest_map->map) &&
 	    vector == dest_map->vectors[vcpu->vcpu_id])
 		rtc_irq_eoi(ioapic, vcpu);
 
@@ -432,21 +433,23 @@  static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
 		if (ent->fields.vector != vector)
 			continue;
 
-		/*
-		 * We are dropping lock while calling ack notifiers because ack
-		 * notifier callbacks for assigned devices call into IOAPIC
-		 * recursively. Since remote_irr is cleared only after call
-		 * to notifiers if the same vector will be delivered while lock
-		 * is dropped it will be put into irr and will be delivered
-		 * after ack notifier returns.
-		 */
-		spin_unlock(&ioapic->lock);
-		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
-		spin_lock(&ioapic->lock);
-
-		if (trigger_mode != IOAPIC_LEVEL_TRIG ||
-		    kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
-			continue;
+		if (!directed) {
+			/*
+			 * We are dropping lock while calling ack notifiers because ack
+			 * notifier callbacks for assigned devices call into IOAPIC
+			 * recursively. Since remote_irr is cleared only after call
+			 * to notifiers if the same vector will be delivered while lock
+			 * is dropped it will be put into irr and will be delivered
+			 * after ack notifier returns.
+			 */
+			spin_unlock(&ioapic->lock);
+			kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+			spin_lock(&ioapic->lock);
+
+			if (trigger_mode != IOAPIC_LEVEL_TRIG ||
+			    kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
+				continue;
+		}
 
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
@@ -478,7 +481,7 @@  void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 
 	spin_lock(&ioapic->lock);
-	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
+	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode, false);
 	spin_unlock(&ioapic->lock);
 }
 
@@ -540,6 +543,7 @@  static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 				 gpa_t addr, int len, const void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
+	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 data;
 	if (!ioapic_in_range(ioapic, addr))
 		return -EOPNOTSUPP;
@@ -575,6 +579,12 @@  static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
 		ioapic_write_indirect(ioapic, data);
 		break;
 
+	case IOAPIC_REG_EOI:
+		if (kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
+			__kvm_ioapic_update_eoi(vcpu, ioapic, data,
+						IOAPIC_LEVEL_TRIG, true);
+		break;
+
 	default:
 		break;
 	}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54..251b61b 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -20,6 +20,7 @@  struct kvm_vcpu;
 /* Direct registers. */
 #define IOAPIC_REG_SELECT  0x00
 #define IOAPIC_REG_WINDOW  0x10
+#define IOAPIC_REG_EOI     0x40
 
 /* Indirect registers. */
 #define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */