diff mbox

[v8,03/13] KVM: Define a new interface kvm_intr_is_single_vcpu()

Message ID 1442393409-2623-4-git-send-email-feng.wu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wu, Feng Sept. 16, 2015, 8:49 a.m. UTC
This patch defines a new interface kvm_intr_is_single_vcpu(),
which can returns whether the interrupt is for single-CPU or not.

It is used by VT-d PI, since now we only support single-CPU
interrupts, For lowest-priority interrupts, if user configures
it via /proc/irq or uses irqbalance to make it single-CPU, we
can use PI to deliver the interrupts to it. Full functionality
of lowest-priority support will be added later.

Signed-off-by: Feng Wu <feng.wu@intel.com>
---
v8:
- Some optimizations in kvm_intr_is_single_vcpu().
- Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
- Add kvm_intr_is_single_vcpu_fast() as the fast path to find
  the target vCPU for the single-destination interrupt

 arch/x86/include/asm/kvm_host.h |  3 ++
 arch/x86/kvm/irq_comm.c         | 94 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.c            |  5 +--
 arch/x86/kvm/lapic.h            |  2 +
 4 files changed, 101 insertions(+), 3 deletions(-)

Comments

Paolo Bonzini Sept. 16, 2015, 9:23 a.m. UTC | #1
On 16/09/2015 10:49, Feng Wu wrote:
> This patch defines a new interface kvm_intr_is_single_vcpu(),
> which can returns whether the interrupt is for single-CPU or not.
> 
> It is used by VT-d PI, since now we only support single-CPU
> interrupts, For lowest-priority interrupts, if user configures
> it via /proc/irq or uses irqbalance to make it single-CPU, we
> can use PI to deliver the interrupts to it. Full functionality
> of lowest-priority support will be added later.
> 
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> v8:
> - Some optimizations in kvm_intr_is_single_vcpu().
> - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
> - Add kvm_intr_is_single_vcpu_fast() as the fast path to find
>   the target vCPU for the single-destination interrupt
> 
>  arch/x86/include/asm/kvm_host.h |  3 ++
>  arch/x86/kvm/irq_comm.c         | 94 +++++++++++++++++++++++++++++++++++++++++
>  arch/x86/kvm/lapic.c            |  5 +--
>  arch/x86/kvm/lapic.h            |  2 +
>  4 files changed, 101 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 49ec903..af11bca 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
>  int x86_set_memory_region(struct kvm *kvm,
>  			  const struct kvm_userspace_memory_region *mem);
>  
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +			     struct kvm_vcpu **dest_vcpu);
> +
>  #endif /* _ASM_X86_KVM_HOST_H */
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 9efff9e..97ba1d6 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -297,6 +297,100 @@ out:
>  	return r;
>  }
>  
> +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> +					 struct kvm_lapic_irq *irq,
> +					 struct kvm_vcpu **dest_vcpu)

Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so
that you do not have to export other functions.

> +{
> +	struct kvm_apic_map *map;
> +	bool ret = false;
> +	struct kvm_lapic *dst = NULL;
> +
> +	if (irq->shorthand)
> +		return false;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	if (!map)
> +		goto out;
> +
> +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> +		if (irq->dest_id == 0xFF)
> +			goto out;
> +
> +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {

Warning here is wrong, the guest can trigger it.

> +			WARN_ON_ONCE(1);
> +			goto out;
> +		}
> +
> +		dst = map->phys_map[irq->dest_id];
> +		if (dst && kvm_apic_present(dst->vcpu))
> +			*dest_vcpu = dst->vcpu;
> +		else
> +			goto out;
> +	} else {
> +		u16 cid;
> +		unsigned long bitmap = 1;
> +		int i, r = 0;
> +
> +		if (!kvm_apic_logical_map_valid(map)) {
> +			WARN_ON_ONCE(1);

Same here.

> +			goto out;
> +		}
> +
> +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> +
> +		if (cid >= ARRAY_SIZE(map->logical_map)) {
> +			WARN_ON_ONCE(1);

Same here.

Otherwise looks good.

Paolo

> +			goto out;
> +		}
> +
> +		for_each_set_bit(i, &bitmap, 16) {
> +			dst = map->logical_map[cid][i];
> +			if (++r == 2)
> +				goto out;
> +		}
> +
> +		if (dst && kvm_apic_present(dst->vcpu))
> +			*dest_vcpu = dst->vcpu;
> +		else
> +			goto out;
> +	}
> +
> +	ret = true;
> +out:
> +	rcu_read_unlock();
> +	return ret;
> +}
> +
> +
> +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> +			     struct kvm_vcpu **dest_vcpu)
> +{
> +	int i, r = 0;
> +	struct kvm_vcpu *vcpu;
> +
> +	if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
> +		return true;
> +
> +	kvm_for_each_vcpu(i, vcpu, kvm) {
> +		if (!kvm_apic_present(vcpu))
> +			continue;
> +
> +		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> +					irq->dest_id, irq->dest_mode))
> +			continue;
> +
> +		if (++r == 2)
> +			return false;
> +
> +		*dest_vcpu = vcpu;
> +	}
> +
> +	return r == 1;
> +}
> +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
> +
>  #define IOAPIC_ROUTING_ENTRY(irq) \
>  	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
>  	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2a5ca97..9848cd50 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
>  /* The logical map is definitely wrong if we have multiple
>   * modes at the same time.  (Physical map is always right.)
>   */
> -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
> +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
>  {
>  	return !(map->mode & (map->mode - 1));
>  }
>  
> -static inline void
> -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
> +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
>  {
>  	unsigned lid_bits;
>  
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 7195274..6798b87 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
>  
>  void wait_lapic_expire(struct kvm_vcpu *vcpu);
>  
> +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid);
> +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map);
>  #endif
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Sept. 17, 2015, 3:17 a.m. UTC | #2
> -----Original Message-----
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> Sent: Wednesday, September 16, 2015 5:23 PM
> To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org;
> mtosatti@redhat.com
> Cc: eric.auger@linaro.org; kvm@vger.kernel.org;
> iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 16/09/2015 10:49, Feng Wu wrote:
> > This patch defines a new interface kvm_intr_is_single_vcpu(),
> > which can returns whether the interrupt is for single-CPU or not.
> >
> > It is used by VT-d PI, since now we only support single-CPU
> > interrupts, For lowest-priority interrupts, if user configures
> > it via /proc/irq or uses irqbalance to make it single-CPU, we
> > can use PI to deliver the interrupts to it. Full functionality
> > of lowest-priority support will be added later.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> > v8:
> > - Some optimizations in kvm_intr_is_single_vcpu().
> > - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code.
> > - Add kvm_intr_is_single_vcpu_fast() as the fast path to find
> >   the target vCPU for the single-destination interrupt
> >
> >  arch/x86/include/asm/kvm_host.h |  3 ++
> >  arch/x86/kvm/irq_comm.c         | 94
> +++++++++++++++++++++++++++++++++++++++++
> >  arch/x86/kvm/lapic.c            |  5 +--
> >  arch/x86/kvm/lapic.h            |  2 +
> >  4 files changed, 101 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/kvm_host.h
> b/arch/x86/include/asm/kvm_host.h
> > index 49ec903..af11bca 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm,
> >  int x86_set_memory_region(struct kvm *kvm,
> >  			  const struct kvm_userspace_memory_region *mem);
> >
> > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > +			     struct kvm_vcpu **dest_vcpu);
> > +
> >  #endif /* _ASM_X86_KVM_HOST_H */
> > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> > index 9efff9e..97ba1d6 100644
> > --- a/arch/x86/kvm/irq_comm.c
> > +++ b/arch/x86/kvm/irq_comm.c
> > @@ -297,6 +297,100 @@ out:
> >  	return r;
> >  }
> >
> > +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> > +					 struct kvm_lapic_irq *irq,
> > +					 struct kvm_vcpu **dest_vcpu)
> 
> Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so
> that you do not have to export other functions.
> 
> > +{
> > +	struct kvm_apic_map *map;
> > +	bool ret = false;
> > +	struct kvm_lapic *dst = NULL;
> > +
> > +	if (irq->shorthand)
> > +		return false;
> > +
> > +	rcu_read_lock();
> > +	map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +	if (!map)
> > +		goto out;
> > +
> > +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> > +		if (irq->dest_id == 0xFF)
> > +			goto out;
> > +
> > +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> 
> Warning here is wrong, the guest can trigger it.

Could you please share more information about how the guest
triggers these conditions (including the following two), Thanks
a lot!

Thanks,
Feng

> 
> > +			WARN_ON_ONCE(1);
> > +			goto out;
> > +		}
> > +
> > +		dst = map->phys_map[irq->dest_id];
> > +		if (dst && kvm_apic_present(dst->vcpu))
> > +			*dest_vcpu = dst->vcpu;
> > +		else
> > +			goto out;
> > +	} else {
> > +		u16 cid;
> > +		unsigned long bitmap = 1;
> > +		int i, r = 0;
> > +
> > +		if (!kvm_apic_logical_map_valid(map)) {
> > +			WARN_ON_ONCE(1);
> 
> Same here.
> 
> > +			goto out;
> > +		}
> > +
> > +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> > +
> > +		if (cid >= ARRAY_SIZE(map->logical_map)) {
> > +			WARN_ON_ONCE(1);
> 
> Same here.
> 
> Otherwise looks good.
> 
> Paolo
> 
> > +			goto out;
> > +		}
> > +
> > +		for_each_set_bit(i, &bitmap, 16) {
> > +			dst = map->logical_map[cid][i];
> > +			if (++r == 2)
> > +				goto out;
> > +		}
> > +
> > +		if (dst && kvm_apic_present(dst->vcpu))
> > +			*dest_vcpu = dst->vcpu;
> > +		else
> > +			goto out;
> > +	}
> > +
> > +	ret = true;
> > +out:
> > +	rcu_read_unlock();
> > +	return ret;
> > +}
> > +
> > +
> > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
> > +			     struct kvm_vcpu **dest_vcpu)
> > +{
> > +	int i, r = 0;
> > +	struct kvm_vcpu *vcpu;
> > +
> > +	if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
> > +		return true;
> > +
> > +	kvm_for_each_vcpu(i, vcpu, kvm) {
> > +		if (!kvm_apic_present(vcpu))
> > +			continue;
> > +
> > +		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
> > +					irq->dest_id, irq->dest_mode))
> > +			continue;
> > +
> > +		if (++r == 2)
> > +			return false;
> > +
> > +		*dest_vcpu = vcpu;
> > +	}
> > +
> > +	return r == 1;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
> > +
> >  #define IOAPIC_ROUTING_ENTRY(irq) \
> >  	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
> >  	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index 2a5ca97..9848cd50 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic
> *apic)
> >  /* The logical map is definitely wrong if we have multiple
> >   * modes at the same time.  (Physical map is always right.)
> >   */
> > -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
> > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
> >  {
> >  	return !(map->mode & (map->mode - 1));
> >  }
> >
> > -static inline void
> > -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
> > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16
> *lid)
> >  {
> >  	unsigned lid_bits;
> >
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 7195274..6798b87 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu,
> int vector);
> >
> >  void wait_lapic_expire(struct kvm_vcpu *vcpu);
> >
> > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16
> *lid);
> > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map);
> >  #endif
> >
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 17, 2015, 9:42 a.m. UTC | #3
On 17/09/2015 05:17, Wu, Feng wrote:
>>> > > +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
>>> > > +		if (irq->dest_id == 0xFF)
>>> > > +			goto out;
>>> > > +
>>> > > +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
>> > 
>> > Warning here is wrong, the guest can trigger it.
> Could you please share more information about how the guest
> triggers these conditions (including the following two), Thanks
> a lot!

irq->dest_id is a 16-bit value, so it can be > 255.

> +		if (!kvm_apic_logical_map_valid(map)) {
> +			WARN_ON_ONCE(1);

Here, the guest can trigger it by setting a few APICs in flat mode and
others in cluster mode, for example.

> +		if (cid >= ARRAY_SIZE(map->logical_map)) {
> +			WARN_ON_ONCE(1);

In x2apic mode irq->dest_id could have bits 12..15 set.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Sept. 17, 2015, 1:36 p.m. UTC | #4
> -----Original Message-----
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> Sent: Thursday, September 17, 2015 5:42 PM
> To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org;
> mtosatti@redhat.com
> Cc: eric.auger@linaro.org; kvm@vger.kernel.org;
> iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> 
> 
> On 17/09/2015 05:17, Wu, Feng wrote:
> >>> > > +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> >>> > > +		if (irq->dest_id == 0xFF)
> >>> > > +			goto out;
> >>> > > +
> >>> > > +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> >> >
> >> > Warning here is wrong, the guest can trigger it.
> > Could you please share more information about how the guest
> > triggers these conditions (including the following two), Thanks
> > a lot!
> 
> irq->dest_id is a 16-bit value, so it can be > 255.

Yes, irq->dest_id is defined as u32, but by looking the current KVM
code, seems desst_id is used as an u8 variable, even in x2apic mode
the dest_id will not beyond 255 (except for broadcast dest in in x2apic
mode). Correct me if I am wrong. Thanks a lot!

> 
> > +		if (!kvm_apic_logical_map_valid(map)) {
> > +			WARN_ON_ONCE(1);
> 
> Here, the guest can trigger it by setting a few APICs in flat mode and
> others in cluster mode, for example.

Oh, right, the logical map works only when the destination mode of all
the vCPUs are the same.

> 
> > +		if (cid >= ARRAY_SIZE(map->logical_map)) {
> > +			WARN_ON_ONCE(1);
> 
> In x2apic mode irq->dest_id could have bits 12..15 set.

cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and
in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as
below:

u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));

So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15.
Do I miss something here? Thanks!

Thanks,
Feng

> 
> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 17, 2015, 2:24 p.m. UTC | #5
>> On 17/09/2015 05:17, Wu, Feng wrote:
>>>>>>> +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
>>>>>>> +		if (irq->dest_id == 0xFF)
>>>>>>> +			goto out;
>>>>>>> +
>>>>>>> +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
>>>>>
>>>>> Warning here is wrong, the guest can trigger it.
>>> Could you please share more information about how the guest
>>> triggers these conditions (including the following two), Thanks
>>> a lot!
>>
>> irq->dest_id is a 16-bit value, so it can be > 255.
> 
> Yes, irq->dest_id is defined as u32, but by looking the current KVM
> code, seems desst_id is used as an u8 variable, even in x2apic mode
> the dest_id will not beyond 255 (except for broadcast dest in in x2apic
> mode). Correct me if I am wrong. Thanks a lot!

Actually you're right, the MSI destination is only 8 bits.  I was
confused because of

#define	 MSI_ADDR_DEST_ID_MASK		0x00ffff0

in arch/x86/include/asm/msidef.h.  But there may be a bug, see below...

>>> +		if (cid >= ARRAY_SIZE(map->logical_map)) {
>>> +			WARN_ON_ONCE(1);
>>
>> In x2apic mode irq->dest_id could have bits 12..15 set.
> 
> cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and
> in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as
> below:
> 
> u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> 
> So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15.

I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is
buggy in x2apic mode.

It does:

        if (apic_x2apic_mode(apic))
                return ((logical_id >> 16) == (mda >> 16))
                       && (logical_id & mda & 0xffff) != 0;

But mda is only 8-bits for MSI and IOAPIC interrupts.

Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id
!= APIC_BROADCAST case?  It never triggers with Linux because it uses
only the physical mode (that's not super-easy to see;
ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that
is allocated with kzalloc).

> Do I miss something here? Thanks!

No, you were right.

But still I think the WARNs are unnecessary; it is conceivable that some
future chipset adds support for more than 8-bits in the dest_id.

Paolo

> Thanks,
> Feng
> 
>>
>> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Sept. 17, 2015, 3:58 p.m. UTC | #6
2015-09-17 16:24+0200, Paolo Bonzini:
> I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is
> buggy in x2apic mode.
> 
> It does:
> 
>         if (apic_x2apic_mode(apic))
>                 return ((logical_id >> 16) == (mda >> 16))
>                        && (logical_id & mda & 0xffff) != 0;
> 
> But mda is only 8-bits for MSI and IOAPIC interrupts.
> 
> Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id
> != APIC_BROADCAST case?  It never triggers with Linux because it uses
> only the physical mode (that's not super-easy to see;
> ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that
> is allocated with kzalloc).

KVM handles that case, it's just convoluted.
(I wish we scrapped the IR-less x2APIC mode.)

For interrupts from MSI and IOxAPIC:
- Flat logical interrupts are delivered as if we had natural
  (CPU0<->bit0, CPU1<->bit1, ...) flat logical xAPIC for first 8 VCPUs.
- Cluster logical doesn't work much, it's interpreted like flat logical.
  I didn't care about xAPIC cluster because Linux, the sole user of our
  paravirtualized x2APIC, doesn't configure it.

I'll paste kvm_apic_mda() source for better explanation:

  static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
                                                struct kvm_lapic *target)
  {
  	bool ipi = source != NULL;
  	bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
  
  	if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
  		return X2APIC_BROADCAST;
  
  	return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
  }

MSI/IOxAPIC interrupt means that source is NULL and if the target is in
x2APIC mode, the original 'dest_id' is returned as mda => a flat logical
xAPIC to 0x0f will get interpreted as (cluster) logical x2APIC 0xf in
kvm_apic_match_logical_addr().
xAPIC address are only 8 bit long so they always get delivered to x2APIC
cluster 0, where first 16 bits work like xAPIC flat logical mode.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 17, 2015, 4 p.m. UTC | #7
On 17/09/2015 17:58, Radim Kr?má? wrote:
> For interrupts from MSI and IOxAPIC:
> - Flat logical interrupts are delivered as if we had natural
>   (CPU0<->bit0, CPU1<->bit1, ...) flat logical xAPIC for first 8 VCPUs.
> - Cluster logical doesn't work much, it's interpreted like flat logical.
>   I didn't care about xAPIC cluster because Linux, the sole user of our
>   paravirtualized x2APIC, doesn't configure it.
> 
> I'll paste kvm_apic_mda() source for better explanation:
> 
>   static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
>                                                 struct kvm_lapic *target)
>   {
>   	bool ipi = source != NULL;
>   	bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
>   
>   	if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
>   		return X2APIC_BROADCAST;
>   
>   	return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
>   }
> 
> MSI/IOxAPIC interrupt means that source is NULL and if the target is in
> x2APIC mode, the original 'dest_id' is returned as mda => a flat logical
> xAPIC to 0x0f will get interpreted as (cluster) logical x2APIC 0xf in
> kvm_apic_match_logical_addr().
> xAPIC address are only 8 bit long so they always get delivered to x2APIC
> cluster 0, where first 16 bits work like xAPIC flat logical mode.

Ok, I was wondering whether this was the correct interpretation.  Thanks!

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Sept. 17, 2015, 11:15 p.m. UTC | #8
> -----Original Message-----
> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
> Sent: Thursday, September 17, 2015 10:25 PM
> To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org;
> mtosatti@redhat.com
> Cc: eric.auger@linaro.org; kvm@vger.kernel.org;
> iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org; Radim
> Kr?má?
> Subject: Re: [PATCH v8 03/13] KVM: Define a new interface
> kvm_intr_is_single_vcpu()
> 
> >> On 17/09/2015 05:17, Wu, Feng wrote:
> >>>>>>> +	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
> >>>>>>> +		if (irq->dest_id == 0xFF)
> >>>>>>> +			goto out;
> >>>>>>> +
> >>>>>>> +		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
> >>>>>
> >>>>> Warning here is wrong, the guest can trigger it.
> >>> Could you please share more information about how the guest
> >>> triggers these conditions (including the following two), Thanks
> >>> a lot!
> >>
> >> irq->dest_id is a 16-bit value, so it can be > 255.
> >
> > Yes, irq->dest_id is defined as u32, but by looking the current KVM
> > code, seems desst_id is used as an u8 variable, even in x2apic mode
> > the dest_id will not beyond 255 (except for broadcast dest in in x2apic
> > mode). Correct me if I am wrong. Thanks a lot!
> 
> Actually you're right, the MSI destination is only 8 bits.  I was
> confused because of
> 
> #define	 MSI_ADDR_DEST_ID_MASK		0x00ffff0
> 
> in arch/x86/include/asm/msidef.h.  But there may be a bug, see below...
> 
> >>> +		if (cid >= ARRAY_SIZE(map->logical_map)) {
> >>> +			WARN_ON_ONCE(1);
> >>
> >> In x2apic mode irq->dest_id could have bits 12..15 set.
> >
> > cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and
> > in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as
> > below:
> >
> > u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
> >
> > So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15.
> 
> I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is
> buggy in x2apic mode.
> 
> It does:
> 
>         if (apic_x2apic_mode(apic))
>                 return ((logical_id >> 16) == (mda >> 16))
>                        && (logical_id & mda & 0xffff) != 0;
> 
> But mda is only 8-bits for MSI and IOAPIC interrupts.
> 
> Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id
> != APIC_BROADCAST case?  It never triggers with Linux because it uses
> only the physical mode (that's not super-easy to see;
> ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that
> is allocated with kzalloc).
> 
> > Do I miss something here? Thanks!
> 
> No, you were right.
> 
> But still I think the WARNs are unnecessary; it is conceivable that some
> future chipset adds support for more than 8-bits in the dest_id.

No problem, I agree with it. Here I just want clarify some questions, thanks
for the elaboration!

Thanks,
Feng

> 
> Paolo
> 
> > Thanks,
> > Feng
> >
> >>
> >> Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Sept. 17, 2015, 11:18 p.m. UTC | #9
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogUGFvbG8gQm9uemluaSBb
bWFpbHRvOnBib256aW5pQHJlZGhhdC5jb21dDQo+IFNlbnQ6IEZyaWRheSwgU2VwdGVtYmVyIDE4
LCAyMDE1IDEyOjAwIEFNDQo+IFRvOiBSYWRpbSBLcsSNbcOhxZkNCj4gQ2M6IFd1LCBGZW5nOyBh
bGV4LndpbGxpYW1zb25AcmVkaGF0LmNvbTsgam9yb0A4Ynl0ZXMub3JnOw0KPiBtdG9zYXR0aUBy
ZWRoYXQuY29tOyBlcmljLmF1Z2VyQGxpbmFyby5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7DQo+
IGlvbW11QGxpc3RzLmxpbnV4LWZvdW5kYXRpb24ub3JnOyBsaW51eC1rZXJuZWxAdmdlci5rZXJu
ZWwub3JnDQo+IFN1YmplY3Q6IFJlOiBbUEFUQ0ggdjggMDMvMTNdIEtWTTogRGVmaW5lIGEgbmV3
IGludGVyZmFjZQ0KPiBrdm1faW50cl9pc19zaW5nbGVfdmNwdSgpDQo+IA0KPiANCj4gDQo+IE9u
IDE3LzA5LzIwMTUgMTc6NTgsIFJhZGltIEtyxI1tw6HFmSB3cm90ZToNCj4gPiBGb3IgaW50ZXJy
dXB0cyBmcm9tIE1TSSBhbmQgSU94QVBJQzoNCj4gPiAtIEZsYXQgbG9naWNhbCBpbnRlcnJ1cHRz
IGFyZSBkZWxpdmVyZWQgYXMgaWYgd2UgaGFkIG5hdHVyYWwNCj4gPiAgIChDUFUwPC0+Yml0MCwg
Q1BVMTwtPmJpdDEsIC4uLikgZmxhdCBsb2dpY2FsIHhBUElDIGZvciBmaXJzdCA4IFZDUFVzLg0K
PiA+IC0gQ2x1c3RlciBsb2dpY2FsIGRvZXNuJ3Qgd29yayBtdWNoLCBpdCdzIGludGVycHJldGVk
IGxpa2UgZmxhdCBsb2dpY2FsLg0KPiA+ICAgSSBkaWRuJ3QgY2FyZSBhYm91dCB4QVBJQyBjbHVz
dGVyIGJlY2F1c2UgTGludXgsIHRoZSBzb2xlIHVzZXIgb2Ygb3VyDQo+ID4gICBwYXJhdmlydHVh
bGl6ZWQgeDJBUElDLCBkb2Vzbid0IGNvbmZpZ3VyZSBpdC4NCj4gPg0KPiA+IEknbGwgcGFzdGUg
a3ZtX2FwaWNfbWRhKCkgc291cmNlIGZvciBiZXR0ZXIgZXhwbGFuYXRpb246DQo+ID4NCj4gPiAg
IHN0YXRpYyB1MzIga3ZtX2FwaWNfbWRhKHVuc2lnbmVkIGludCBkZXN0X2lkLCBzdHJ1Y3Qga3Zt
X2xhcGljICpzb3VyY2UsDQo+ID4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg
ICAgICAgICAgICAgc3RydWN0IGt2bV9sYXBpYw0KPiAqdGFyZ2V0KQ0KPiA+ICAgew0KPiA+ICAg
CWJvb2wgaXBpID0gc291cmNlICE9IE5VTEw7DQo+ID4gICAJYm9vbCB4MmFwaWNfbWRhID0gYXBp
Y194MmFwaWNfbW9kZShpcGkgPyBzb3VyY2UgOiB0YXJnZXQpOw0KPiA+DQo+ID4gICAJaWYgKCFp
cGkgJiYgZGVzdF9pZCA9PSBBUElDX0JST0FEQ0FTVCAmJiB4MmFwaWNfbWRhKQ0KPiA+ICAgCQly
ZXR1cm4gWDJBUElDX0JST0FEQ0FTVDsNCj4gPg0KPiA+ICAgCXJldHVybiB4MmFwaWNfbWRhID8g
ZGVzdF9pZCA6IFNFVF9BUElDX0RFU1RfRklFTEQoZGVzdF9pZCk7DQo+ID4gICB9DQo+ID4NCj4g
PiBNU0kvSU94QVBJQyBpbnRlcnJ1cHQgbWVhbnMgdGhhdCBzb3VyY2UgaXMgTlVMTCBhbmQgaWYg
dGhlIHRhcmdldCBpcyBpbg0KPiA+IHgyQVBJQyBtb2RlLCB0aGUgb3JpZ2luYWwgJ2Rlc3RfaWQn
IGlzIHJldHVybmVkIGFzIG1kYSA9PiBhIGZsYXQgbG9naWNhbA0KPiA+IHhBUElDIHRvIDB4MGYg
d2lsbCBnZXQgaW50ZXJwcmV0ZWQgYXMgKGNsdXN0ZXIpIGxvZ2ljYWwgeDJBUElDIDB4ZiBpbg0K
PiA+IGt2bV9hcGljX21hdGNoX2xvZ2ljYWxfYWRkcigpLg0KPiA+IHhBUElDIGFkZHJlc3MgYXJl
IG9ubHkgOCBiaXQgbG9uZyBzbyB0aGV5IGFsd2F5cyBnZXQgZGVsaXZlcmVkIHRvIHgyQVBJQw0K
PiA+IGNsdXN0ZXIgMCwgd2hlcmUgZmlyc3QgMTYgYml0cyB3b3JrIGxpa2UgeEFQSUMgZmxhdCBs
b2dpY2FsIG1vZGUuDQo+IA0KPiBPaywgSSB3YXMgd29uZGVyaW5nIHdoZXRoZXIgdGhpcyB3YXMg
dGhlIGNvcnJlY3QgaW50ZXJwcmV0YXRpb24uICBUaGFua3MhDQoNClBhb2xvLCBJIGRvbid0IHRo
aW5rIFJhZGltIGNsYXJpZnkgeW91ciBjb25jZXJuLCByaWdodD8gU2luY2UgbWRhIGlzIDgtYml0
LCBpdA0KaXMgd3Jvbmcgd2l0aCBtZGEgPj4gMTYsIHRoaXMgaXMgeW91ciBjb25jZXJuLCByaWdo
dD8NCg0KVGhhbmtzLA0KRmVuZw0KDQo+IA0KPiBQYW9sbw0K
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Sept. 18, 2015, 4:16 p.m. UTC | #10
2015-09-17 23:18+0000, Wu, Feng:
>> From: Paolo Bonzini [mailto:pbonzini@redhat.com]
>> On 17/09/2015 17:58, Radim Kr?má? wrote:
>>> xAPIC address are only 8 bit long so they always get delivered to x2APIC
>>> cluster 0, where first 16 bits work like xAPIC flat logical mode.
>> 
>> Ok, I was wondering whether this was the correct interpretation.  Thanks!
> 
> Paolo, I don't think Radim clarify your concern, right? Since mda is 8-bit, it
> is wrong with mda >> 16, this is your concern, right?

In case it was:  mda is u32 so the bitshift is defined by C.
(xAPIC destinations in KVM's x2APIC mode are stored in lowest 8 bits of
 mda, hence the cluster is always 0.)

Or am I still missing the point?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Sept. 18, 2015, 4:17 p.m. UTC | #11
On 18/09/2015 18:16, Radim Kr?má? wrote:
>>> >> Ok, I was wondering whether this was the correct interpretation.  Thanks!
>> > 
>> > Paolo, I don't think Radim clarify your concern, right? Since mda is 8-bit, it
>> > is wrong with mda >> 16, this is your concern, right?
> In case it was:  mda is u32 so the bitshift is defined by C.
> (xAPIC destinations in KVM's x2APIC mode are stored in lowest 8 bits of
>  mda, hence the cluster is always 0.)
> 
> Or am I still missing the point?

Yes, remembering that the cluster is always 0 solved my doubt.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49ec903..af11bca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1204,4 +1204,7 @@  int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
 			  const struct kvm_userspace_memory_region *mem);
 
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			     struct kvm_vcpu **dest_vcpu);
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 9efff9e..97ba1d6 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -297,6 +297,100 @@  out:
 	return r;
 }
 
+static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
+					 struct kvm_lapic_irq *irq,
+					 struct kvm_vcpu **dest_vcpu)
+{
+	struct kvm_apic_map *map;
+	bool ret = false;
+	struct kvm_lapic *dst = NULL;
+
+	if (irq->shorthand)
+		return false;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if (irq->dest_mode == APIC_DEST_PHYSICAL) {
+		if (irq->dest_id == 0xFF)
+			goto out;
+
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) {
+			WARN_ON_ONCE(1);
+			goto out;
+		}
+
+		dst = map->phys_map[irq->dest_id];
+		if (dst && kvm_apic_present(dst->vcpu))
+			*dest_vcpu = dst->vcpu;
+		else
+			goto out;
+	} else {
+		u16 cid;
+		unsigned long bitmap = 1;
+		int i, r = 0;
+
+		if (!kvm_apic_logical_map_valid(map)) {
+			WARN_ON_ONCE(1);
+			goto out;
+		}
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+		if (cid >= ARRAY_SIZE(map->logical_map)) {
+			WARN_ON_ONCE(1);
+			goto out;
+		}
+
+		for_each_set_bit(i, &bitmap, 16) {
+			dst = map->logical_map[cid][i];
+			if (++r == 2)
+				goto out;
+		}
+
+		if (dst && kvm_apic_present(dst->vcpu))
+			*dest_vcpu = dst->vcpu;
+		else
+			goto out;
+	}
+
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+
+bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
+			     struct kvm_vcpu **dest_vcpu)
+{
+	int i, r = 0;
+	struct kvm_vcpu *vcpu;
+
+	if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu))
+		return true;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		if (++r == 2)
+			return false;
+
+		*dest_vcpu = vcpu;
+	}
+
+	return r == 1;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu);
+
 #define IOAPIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a5ca97..9848cd50 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -136,13 +136,12 @@  static inline int kvm_apic_id(struct kvm_lapic *apic)
 /* The logical map is definitely wrong if we have multiple
  * modes at the same time.  (Physical map is always right.)
  */
-static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
 {
 	return !(map->mode & (map->mode - 1));
 }
 
-static inline void
-apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
 {
 	unsigned lid_bits;
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 7195274..6798b87 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -169,4 +169,6 @@  bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
 
+void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid);
+bool kvm_apic_logical_map_valid(struct kvm_apic_map *map);
 #endif