diff mbox

[v3,2/4] KVM: x86: Use vector-hashing to deliver lowest-priority interrupts

Message ID 1453254177-103002-3-git-send-email-feng.wu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wu, Feng Jan. 20, 2016, 1:42 a.m. UTC
Use vector-hashing to deliver lowest-priority interrupts, As an
example, modern Intel CPUs in server platform use this method to
handle lowest-priority interrupts.

Signed-off-by: Feng Wu <feng.wu@intel.com>
---
v3:
- Fix a bug for sparse topologies, in that case, vcpu_id is not equal
to the return value got by kvm_get_vcpu().
- Remove unnecessary check in fast irq delivery patch.
- print a error message only once for each guest when we find hardware
  disabled LAPIC during interrupt injection.

 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/irq_comm.c         | 27 +++++++++++++++++----
 arch/x86/kvm/lapic.c            | 52 ++++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/lapic.h            |  2 ++
 arch/x86/kvm/x86.c              |  9 +++++++
 arch/x86/kvm/x86.h              |  1 +
 6 files changed, 85 insertions(+), 8 deletions(-)

Comments

Yang Zhang Jan. 21, 2016, 5:23 a.m. UTC | #1
On 2016/1/20 9:42, Feng Wu wrote:
> Use vector-hashing to deliver lowest-priority interrupts, As an
> example, modern Intel CPUs in server platform use this method to
> handle lowest-priority interrupts.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> v3:
> - Fix a bug for sparse topologies, in that case, vcpu_id is not equal
> to the return value got by kvm_get_vcpu().
> - Remove unnecessary check in fast irq delivery patch.
> - print a error message only once for each guest when we find hardware
>    disabled LAPIC during interrupt injection.
>
>   arch/x86/include/asm/kvm_host.h |  2 ++
>   arch/x86/kvm/irq_comm.c         | 27 +++++++++++++++++----
>   arch/x86/kvm/lapic.c            | 52 ++++++++++++++++++++++++++++++++++++++---
>   arch/x86/kvm/lapic.h            |  2 ++
>   arch/x86/kvm/x86.c              |  9 +++++++
>   arch/x86/kvm/x86.h              |  1 +
>   6 files changed, 85 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 44adbb8..5054810 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -754,6 +754,8 @@ struct kvm_arch {
>
>   	bool irqchip_split;
>   	u8 nr_reserved_ioapic_pins;
> +
> +	int disabled_lapic_found;
>   };
>
>   struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> index 8fc89ef..062e907 100644
> --- a/arch/x86/kvm/irq_comm.c
> +++ b/arch/x86/kvm/irq_comm.c
> @@ -34,6 +34,7 @@
>   #include "lapic.h"
>
>   #include "hyperv.h"
> +#include "x86.h"
>
>   static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
>   			   struct kvm *kvm, int irq_source_id, int level,
> @@ -55,8 +56,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
>   int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>   		struct kvm_lapic_irq *irq, unsigned long *dest_map)
>   {
> -	int i, r = -1;
> +	int i, r = -1, idx = 0;
>   	struct kvm_vcpu *vcpu, *lowest = NULL;
> +	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> +	unsigned int dest_vcpus = 0;
>
>   	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>   			kvm_lowest_prio_delivery(irq)) {
> @@ -67,6 +70,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>   	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
>   		return r;
>
> +	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
> +
>   	kvm_for_each_vcpu(i, vcpu, kvm) {
>   		if (!kvm_apic_present(vcpu))
>   			continue;
> @@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>   				r = 0;
>   			r += kvm_apic_set_irq(vcpu, irq, dest_map);
>   		} else if (kvm_lapic_enabled(vcpu)) {
> -			if (!lowest)
> -				lowest = vcpu;
> -			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> -				lowest = vcpu;
> +			if (!kvm_vector_hashing_enabled()) {
> +				if (!lowest)
> +					lowest = vcpu;
> +				else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> +					lowest = vcpu;
> +			} else {
> +				__set_bit(i, dest_vcpu_bitmap);
> +				dest_vcpus++;
> +			}
>   		}
>   	}
>
> +	if (dest_vcpus != 0) {
> +		idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> +					 dest_vcpu_bitmap, KVM_MAX_VCPUS);
> +
> +		lowest = kvm_get_vcpu(kvm, idx - 1);
> +	}
> +
>   	if (lowest)
>   		r = kvm_apic_set_irq(lowest, irq, dest_map);
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 36591fa..e1a449da 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -675,6 +675,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
>   	}
>   }
>
> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> +		       const unsigned long *bitmap, u32 bitmap_size)
> +{
> +	u32 mod;
> +	int i, idx = 0;
> +
> +	mod = vector % dest_vcpus;
> +
> +	for (i = 0; i <= mod; i++) {
> +		idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
> +		BUG_ON(idx > bitmap_size);
> +	}
> +
> +	return idx;
> +}
> +
>   bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>   		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>   {
> @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>
>   		dst = map->logical_map[cid];
>
> -		if (kvm_lowest_prio_delivery(irq)) {
> +		if (!kvm_lowest_prio_delivery(irq))
> +			goto set_irq;
> +
> +		if (!kvm_vector_hashing_enabled()) {
>   			int l = -1;
>   			for_each_set_bit(i, &bitmap, 16) {
>   				if (!dst[i])
>   					continue;
>   				if (l < 0)
>   					l = i;
> -				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> +							dst[l]->vcpu) < 0)
>   					l = i;
>   			}
> -
>   			bitmap = (l >= 0) ? 1 << l : 0;
> +		} else {
> +			int idx = 0;
> +			unsigned int dest_vcpus = 0;
> +
> +			dest_vcpus = hweight16(bitmap);
> +			if (dest_vcpus == 0)
> +				goto out;
> +
> +			idx = kvm_vector_2_index(irq->vector,
> +				dest_vcpus, &bitmap, 16);
> +
> +			/*
> +			 * We may find a hardware disabled LAPIC here, if that
> +			 * is the case, print out a error message once for each
> +			 * guest and return.
> +			 */
> +			if (!dst[idx-1] &&
> +				(kvm->arch.disabled_lapic_found == 0)) {
> +				kvm->arch.disabled_lapic_found = 1;
> +				printk(KERN_ERR
> +					"Disabled LAPIC found during irq injection\n");
> +				goto out;

What does "goto out" mean? Inject successfully or fail? According the 
value of ret which is set to ture here, it means inject successfully but 
i = -1.

> +			}
> +
> +			bitmap = 0;
> +			__set_bit(idx-1, &bitmap);

We can reuse the code like:
bitmap = (idx > 0) ? 1 << (idx - 1) : 0;

>   		}
>   	}
>
> +set_irq:
>   	for_each_set_bit(i, &bitmap, 16) {
>   		if (!dst[i])
>   			continue;
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 41bdb35..d864601 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -175,4 +175,6 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu);
>
>   bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>   			struct kvm_vcpu **dest_vcpu);
> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> +		       const unsigned long *bitmap, u32 bitmap_size);
>   #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4244c2b..47daf77 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
>   unsigned int __read_mostly lapic_timer_advance_ns = 0;
>   module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
>
> +bool __read_mostly enable_vector_hashing = 1;
> +module_param(enable_vector_hashing, bool, S_IRUGO);
> +
>   static bool __read_mostly backwards_tsc_observed = false;
>
>   #define KVM_NR_SHARED_MSRS 16
> @@ -8370,6 +8373,12 @@ int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
>   	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
>   }
>
> +bool kvm_vector_hashing_enabled(void)
> +{
> +	return enable_vector_hashing;
> +}
> +EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
> +
>   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
>   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
>   EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index f2afa5f..04bd0f9 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -179,6 +179,7 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
>   int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
>   bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
>   					  int page_num);
> +bool kvm_vector_hashing_enabled(void);
>
>   #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
>   				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
>
Wu, Feng Jan. 21, 2016, 5:33 a.m. UTC | #2
> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> owner@vger.kernel.org] On Behalf Of Yang Zhang
> Sent: Thursday, January 21, 2016 1:24 PM
> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> rkrcmar@redhat.com
> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> On 2016/1/20 9:42, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts, As an
> > example, modern Intel CPUs in server platform use this method to
> > handle lowest-priority interrupts.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> >   bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> *src,
> >   		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> >   {
> > @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> *kvm, struct kvm_lapic *src,
> >
> >   		dst = map->logical_map[cid];
> >
> > -		if (kvm_lowest_prio_delivery(irq)) {
> > +		if (!kvm_lowest_prio_delivery(irq))
> > +			goto set_irq;
> > +
> > +		if (!kvm_vector_hashing_enabled()) {
> >   			int l = -1;
> >   			for_each_set_bit(i, &bitmap, 16) {
> >   				if (!dst[i])
> >   					continue;
> >   				if (l < 0)
> >   					l = i;
> > -				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> dst[l]->vcpu) < 0)
> > +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> > +							dst[l]->vcpu) < 0)
> >   					l = i;
> >   			}
> > -
> >   			bitmap = (l >= 0) ? 1 << l : 0;
> > +		} else {
> > +			int idx = 0;
> > +			unsigned int dest_vcpus = 0;
> > +
> > +			dest_vcpus = hweight16(bitmap);
> > +			if (dest_vcpus == 0)
> > +				goto out;
> > +
> > +			idx = kvm_vector_2_index(irq->vector,
> > +				dest_vcpus, &bitmap, 16);
> > +
> > +			/*
> > +			 * We may find a hardware disabled LAPIC here, if
> that
> > +			 * is the case, print out a error message once for each
> > +			 * guest and return.
> > +			 */
> > +			if (!dst[idx-1] &&
> > +				(kvm->arch.disabled_lapic_found == 0)) {
> > +				kvm->arch.disabled_lapic_found = 1;
> > +				printk(KERN_ERR
> > +					"Disabled LAPIC found during irq
> injection\n");
> > +				goto out;
> 
> What does "goto out" mean? Inject successfully or fail? According the
> value of ret which is set to ture here, it means inject successfully but
> i = -1.
> 

Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
to false like another function, I should add a "ret = false' here. We should
failed to inject the interrupt since hardware disabled LAPIC is found.

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Jan. 21, 2016, 5:42 a.m. UTC | #3
On 2016/1/21 13:33, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>> Sent: Thursday, January 21, 2016 1:24 PM
>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>> rkrcmar@redhat.com
>> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>> On 2016/1/20 9:42, Feng Wu wrote:
>>> Use vector-hashing to deliver lowest-priority interrupts, As an
>>> example, modern Intel CPUs in server platform use this method to
>>> handle lowest-priority interrupts.
>>>
>>> Signed-off-by: Feng Wu <feng.wu@intel.com>
>>> ---
>>>    bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
>> *src,
>>>    		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>>>    {
>>> @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
>> *kvm, struct kvm_lapic *src,
>>>
>>>    		dst = map->logical_map[cid];
>>>
>>> -		if (kvm_lowest_prio_delivery(irq)) {
>>> +		if (!kvm_lowest_prio_delivery(irq))
>>> +			goto set_irq;
>>> +
>>> +		if (!kvm_vector_hashing_enabled()) {
>>>    			int l = -1;
>>>    			for_each_set_bit(i, &bitmap, 16) {
>>>    				if (!dst[i])
>>>    					continue;
>>>    				if (l < 0)
>>>    					l = i;
>>> -				else if (kvm_apic_compare_prio(dst[i]->vcpu,
>> dst[l]->vcpu) < 0)
>>> +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
>>> +							dst[l]->vcpu) < 0)
>>>    					l = i;
>>>    			}
>>> -
>>>    			bitmap = (l >= 0) ? 1 << l : 0;
>>> +		} else {
>>> +			int idx = 0;
>>> +			unsigned int dest_vcpus = 0;
>>> +
>>> +			dest_vcpus = hweight16(bitmap);
>>> +			if (dest_vcpus == 0)
>>> +				goto out;
>>> +
>>> +			idx = kvm_vector_2_index(irq->vector,
>>> +				dest_vcpus, &bitmap, 16);
>>> +
>>> +			/*
>>> +			 * We may find a hardware disabled LAPIC here, if
>> that
>>> +			 * is the case, print out a error message once for each
>>> +			 * guest and return.
>>> +			 */
>>> +			if (!dst[idx-1] &&
>>> +				(kvm->arch.disabled_lapic_found == 0)) {
>>> +				kvm->arch.disabled_lapic_found = 1;
>>> +				printk(KERN_ERR
>>> +					"Disabled LAPIC found during irq
>> injection\n");
>>> +				goto out;
>>
>> What does "goto out" mean? Inject successfully or fail? According the
>> value of ret which is set to ture here, it means inject successfully but
>> i = -1.
>>
>
> Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
> to false like another function, I should add a "ret = false' here. We should
> failed to inject the interrupt since hardware disabled LAPIC is found.

I remember we have discussed that even the LAPIC is software disabled, 
it still can respond to some interrupts like INIT, NMI, SMI, and SIPI 
messages. Isn't current logic still problematically?
Wu, Feng Jan. 21, 2016, 5:46 a.m. UTC | #4
> -----Original Message-----
> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
> Sent: Thursday, January 21, 2016 1:43 PM
> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> rkrcmar@redhat.com
> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> On 2016/1/21 13:33, Wu, Feng wrote:
> >
> >
> >> -----Original Message-----
> >> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> >> owner@vger.kernel.org] On Behalf Of Yang Zhang
> >> Sent: Thursday, January 21, 2016 1:24 PM
> >> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> >> rkrcmar@redhat.com
> >> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> >> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver
> lowest-
> >> priority interrupts
> >>
> >> On 2016/1/20 9:42, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts, As an
> >>> example, modern Intel CPUs in server platform use this method to
> >>> handle lowest-priority interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <feng.wu@intel.com>
> >>> ---
> >>>    bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
> >> *src,
> >>>    		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
> >>>    {
> >>> @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
> >> *kvm, struct kvm_lapic *src,
> >>>
> >>>    		dst = map->logical_map[cid];
> >>>
> >>> -		if (kvm_lowest_prio_delivery(irq)) {
> >>> +		if (!kvm_lowest_prio_delivery(irq))
> >>> +			goto set_irq;
> >>> +
> >>> +		if (!kvm_vector_hashing_enabled()) {
> >>>    			int l = -1;
> >>>    			for_each_set_bit(i, &bitmap, 16) {
> >>>    				if (!dst[i])
> >>>    					continue;
> >>>    				if (l < 0)
> >>>    					l = i;
> >>> -				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> >> dst[l]->vcpu) < 0)
> >>> +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> >>> +							dst[l]->vcpu) < 0)
> >>>    					l = i;
> >>>    			}
> >>> -
> >>>    			bitmap = (l >= 0) ? 1 << l : 0;
> >>> +		} else {
> >>> +			int idx = 0;
> >>> +			unsigned int dest_vcpus = 0;
> >>> +
> >>> +			dest_vcpus = hweight16(bitmap);
> >>> +			if (dest_vcpus == 0)
> >>> +				goto out;
> >>> +
> >>> +			idx = kvm_vector_2_index(irq->vector,
> >>> +				dest_vcpus, &bitmap, 16);
> >>> +
> >>> +			/*
> >>> +			 * We may find a hardware disabled LAPIC here, if
> >> that
> >>> +			 * is the case, print out a error message once for each
> >>> +			 * guest and return.
> >>> +			 */
> >>> +			if (!dst[idx-1] &&
> >>> +				(kvm->arch.disabled_lapic_found == 0)) {
> >>> +				kvm->arch.disabled_lapic_found = 1;
> >>> +				printk(KERN_ERR
> >>> +					"Disabled LAPIC found during irq
> >> injection\n");
> >>> +				goto out;
> >>
> >> What does "goto out" mean? Inject successfully or fail? According the
> >> value of ret which is set to ture here, it means inject successfully but
> >> i = -1.
> >>
> >
> > Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
> > to false like another function, I should add a "ret = false' here. We should
> > failed to inject the interrupt since hardware disabled LAPIC is found.
> 
> I remember we have discussed that even the LAPIC is software disabled,
> it still can respond to some interrupts like INIT, NMI, SMI, and SIPI
> messages. Isn't current logic still problematically?

I don't think there are problems, here we only cover lowest-priority mode.

Thanks,
Feng

> 
> --
> best regards
> yang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Jan. 21, 2016, 5:57 a.m. UTC | #5
On 2016/1/21 13:46, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
>> Sent: Thursday, January 21, 2016 1:43 PM
>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>> rkrcmar@redhat.com
>> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>> On 2016/1/21 13:33, Wu, Feng wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>>>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>>>> Sent: Thursday, January 21, 2016 1:24 PM
>>>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>>>> rkrcmar@redhat.com
>>>> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
>>>> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver
>> lowest-
>>>> priority interrupts
>>>>
>>>> On 2016/1/20 9:42, Feng Wu wrote:
>>>>> Use vector-hashing to deliver lowest-priority interrupts, As an
>>>>> example, modern Intel CPUs in server platform use this method to
>>>>> handle lowest-priority interrupts.
>>>>>
>>>>> Signed-off-by: Feng Wu <feng.wu@intel.com>
>>>>> ---
>>>>>     bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic
>>>> *src,
>>>>>     		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>>>>>     {
>>>>> @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm
>>>> *kvm, struct kvm_lapic *src,
>>>>>
>>>>>     		dst = map->logical_map[cid];
>>>>>
>>>>> -		if (kvm_lowest_prio_delivery(irq)) {
>>>>> +		if (!kvm_lowest_prio_delivery(irq))
>>>>> +			goto set_irq;
>>>>> +
>>>>> +		if (!kvm_vector_hashing_enabled()) {
>>>>>     			int l = -1;
>>>>>     			for_each_set_bit(i, &bitmap, 16) {
>>>>>     				if (!dst[i])
>>>>>     					continue;
>>>>>     				if (l < 0)
>>>>>     					l = i;
>>>>> -				else if (kvm_apic_compare_prio(dst[i]->vcpu,
>>>> dst[l]->vcpu) < 0)
>>>>> +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
>>>>> +							dst[l]->vcpu) < 0)
>>>>>     					l = i;
>>>>>     			}
>>>>> -
>>>>>     			bitmap = (l >= 0) ? 1 << l : 0;
>>>>> +		} else {
>>>>> +			int idx = 0;
>>>>> +			unsigned int dest_vcpus = 0;
>>>>> +
>>>>> +			dest_vcpus = hweight16(bitmap);
>>>>> +			if (dest_vcpus == 0)
>>>>> +				goto out;
>>>>> +
>>>>> +			idx = kvm_vector_2_index(irq->vector,
>>>>> +				dest_vcpus, &bitmap, 16);
>>>>> +
>>>>> +			/*
>>>>> +			 * We may find a hardware disabled LAPIC here, if
>>>> that
>>>>> +			 * is the case, print out a error message once for each
>>>>> +			 * guest and return.
>>>>> +			 */
>>>>> +			if (!dst[idx-1] &&
>>>>> +				(kvm->arch.disabled_lapic_found == 0)) {
>>>>> +				kvm->arch.disabled_lapic_found = 1;
>>>>> +				printk(KERN_ERR
>>>>> +					"Disabled LAPIC found during irq
>>>> injection\n");
>>>>> +				goto out;
>>>>
>>>> What does "goto out" mean? Inject successfully or fail? According the
>>>> value of ret which is set to ture here, it means inject successfully but
>>>> i = -1.
>>>>
>>>
>>> Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
>>> to false like another function, I should add a "ret = false' here. We should
>>> failed to inject the interrupt since hardware disabled LAPIC is found.
>>
>> I remember we have discussed that even the LAPIC is software disabled,
>> it still can respond to some interrupts like INIT, NMI, SMI, and SIPI
>> messages. Isn't current logic still problematically?
>
> I don't think there are problems, here we only cover lowest-priority mode.

Does Intel SDM said those interrupts cannot be delivered on 
lowest-priority mode?

CC Jun.

Hi Jun,

Do you know whether INIT, NMI, SMI, and SIPI can be delivered through 
lowest-priority mode? I didn't find SDM says no.
Wu, Feng Jan. 21, 2016, 6:02 a.m. UTC | #6
> -----Original Message-----
> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
> Sent: Thursday, January 21, 2016 1:58 PM
> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> rkrcmar@redhat.com
> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> >>
> >> I remember we have discussed that even the LAPIC is software disabled,
> >> it still can respond to some interrupts like INIT, NMI, SMI, and SIPI
> >> messages. Isn't current logic still problematically?
> >
> > I don't think there are problems, here we only cover lowest-priority mode.
> 
> Does Intel SDM said those interrupts cannot be delivered on
> lowest-priority mode?

Fixed, Lowest-priority, SMI, NMI, INIT are all "Delivery Mode", once it is
Lowest-priority, it cannot be other type, afaik.

Thanks,
Feng

> 
> CC Jun.
> 
> Hi Jun,
> 
> Do you know whether INIT, NMI, SMI, and SIPI can be delivered through
> lowest-priority mode? I didn't find SDM says no.
> 
> --
> best regards
> yang
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Jan. 21, 2016, 6:07 a.m. UTC | #7
On 2016/1/21 14:02, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
>> Sent: Thursday, January 21, 2016 1:58 PM
>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>> rkrcmar@redhat.com
>> Cc: linux-kernel@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
>> priority interrupts
>>
>>>>
>>>> I remember we have discussed that even the LAPIC is software disabled,
>>>> it still can respond to some interrupts like INIT, NMI, SMI, and SIPI
>>>> messages. Isn't current logic still problematically?
>>>
>>> I don't think there are problems, here we only cover lowest-priority mode.
>>
>> Does Intel SDM said those interrupts cannot be delivered on
>> lowest-priority mode?
>
> Fixed, Lowest-priority, SMI, NMI, INIT are all "Delivery Mode", once it is
> Lowest-priority, it cannot be other type, afaik.

You are correct, I missed it with physical and logical mode. Also, i 
noticed you have the check at the beginning:

+		if (!kvm_lowest_prio_delivery(irq))
+			goto set_irq;
Radim Krčmář Jan. 21, 2016, 5:21 p.m. UTC | #8
2016-01-21 05:33+0000, Wu, Feng:
>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>> On 2016/1/20 9:42, Feng Wu wrote:
>> > +			/*
>> > +			 * We may find a hardware disabled LAPIC here, if
>> that
>> > +			 * is the case, print out a error message once for each
>> > +			 * guest and return.
>> > +			 */
>> > +			if (!dst[idx-1] &&
>> > +				(kvm->arch.disabled_lapic_found == 0)) {
>> > +				kvm->arch.disabled_lapic_found = 1;
>> > +				printk(KERN_ERR
>> > +					"Disabled LAPIC found during irq
>> injection\n");
>> > +				goto out;
>> 
>> What does "goto out" mean? Inject successfully or fail? According the
>> value of ret which is set to ture here, it means inject successfully but

(true actually means that fast path did the job and slow path isn't
 needed.)

>> i = -1.

(I think there isn't a practical difference between *r=-1 and *r=0.)

> Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
> to false like another function, I should add a "ret = false' here. We should
> failed to inject the interrupt since hardware disabled LAPIC is found.

'ret = true' is the better one.  We know that the interrupt is not
deliverable [1], so there's no point in trying to deliver with the slow
path.  We behave similarly when the interrupt targets a single disabled
APIC.

---
1: Well ... it's possible that slowpath would deliver it thanks to
   different handling of disabled APICs, but it's undefined behavior,
   so it doesn't matter matter if we don't try.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 21, 2016, 7:49 p.m. UTC | #9
2016-01-20 09:42+0800, Feng Wu:
> Use vector-hashing to deliver lowest-priority interrupts, As an
> example, modern Intel CPUs in server platform use this method to
> handle lowest-priority interrupts.
> 
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---

Functionality looks good, so I had a lot of stylistic comments, sorry :)

> v3:
> - Fix a bug for sparse topologies, in that case, vcpu_id is not equal
> to the return value got by kvm_get_vcpu().
> - Remove unnecessary check in fast irq delivery patch.
> - print a error message only once for each guest when we find hardware
>   disabled LAPIC during interrupt injection.
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> @@ -754,6 +754,8 @@ struct kvm_arch {
>  
>  	bool irqchip_split;
>  	u8 nr_reserved_ioapic_pins;
> +
> +	int disabled_lapic_found;

Fits into "bool".

>  };
>  
>  struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
> @@ -34,6 +34,7 @@
>  #include "lapic.h"
>  
>  #include "hyperv.h"
> +#include "x86.h"
>  
>  static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
>  			   struct kvm *kvm, int irq_source_id, int level,
> @@ -55,8 +56,10 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
>  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  		struct kvm_lapic_irq *irq, unsigned long *dest_map)
>  {
> -	int i, r = -1;
> +	int i, r = -1, idx = 0;

(No need to initialize idx.)

>  	struct kvm_vcpu *vcpu, *lowest = NULL;
> +	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
> +	unsigned int dest_vcpus = 0;
>  
>  	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
>  			kvm_lowest_prio_delivery(irq)) {
> @@ -80,13 +85,25 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
>  				r = 0;
>  			r += kvm_apic_set_irq(vcpu, irq, dest_map);
>  		} else if (kvm_lapic_enabled(vcpu)) {
> -			if (!lowest)
> -				lowest = vcpu;
> -			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> -				lowest = vcpu;
> +			if (!kvm_vector_hashing_enabled()) {
> +				if (!lowest)
> +					lowest = vcpu;
> +				else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
> +					lowest = vcpu;
> +			} else {
> +				__set_bit(i, dest_vcpu_bitmap);
> +				dest_vcpus++;
> +			}
>  		}
>  	}
>  
> +	if (dest_vcpus != 0) {

(I think it's ok to do 'int idx = kvm...')

> +		idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> +					 dest_vcpu_bitmap, KVM_MAX_VCPUS);
> +
> +		lowest = kvm_get_vcpu(kvm, idx - 1);
> +	}
> +
>  	if (lowest)
>  		r = kvm_apic_set_irq(lowest, irq, dest_map);
>  
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> @@ -675,6 +675,22 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
>  	}
>  }
>  
> +int kvm_vector_2_index(u32 vector, u32 dest_vcpus,

(The "2" in name is inconsistent, other functions use "to".)

> +		       const unsigned long *bitmap, u32 bitmap_size)
> +{
> +	u32 mod;
> +	int i, idx = 0;
> +
> +	mod = vector % dest_vcpus;
> +
> +	for (i = 0; i <= mod; i++) {
> +		idx = find_next_bit(bitmap, bitmap_size, idx) + 1;

I'd remove this "+ 1".  Current users don't check for errors and always
do "- 1".  The new error value could be 'idx = bitmap_size', with u32 as
return type.

> +		BUG_ON(idx > bitmap_size);
> +	}
> +
> +	return idx;
> +}
> +
>  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>  		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
>  {
> @@ -727,21 +743,51 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
>  
>  		dst = map->logical_map[cid];
>  
> -		if (kvm_lowest_prio_delivery(irq)) {
> +		if (!kvm_lowest_prio_delivery(irq))
> +			goto set_irq;
> +
> +		if (!kvm_vector_hashing_enabled()) {
>  			int l = -1;
>  			for_each_set_bit(i, &bitmap, 16) {
>  				if (!dst[i])
>  					continue;
>  				if (l < 0)
>  					l = i;
> -				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
> +				else if (kvm_apic_compare_prio(dst[i]->vcpu,
> +							dst[l]->vcpu) < 0)
>  					l = i;
>  			}
> -
>  			bitmap = (l >= 0) ? 1 << l : 0;
> +		} else {
> +			int idx = 0;
> +			unsigned int dest_vcpus = 0;

(No need to zero them.  Compiler will optimize it, but it increases the
 cognitive load on readers.)

> +
> +			dest_vcpus = hweight16(bitmap);
> +			if (dest_vcpus == 0)
> +				goto out;
> +
> +			idx = kvm_vector_2_index(irq->vector,
> +				dest_vcpus, &bitmap, 16);
> +
> +			/*
> +			 * We may find a hardware disabled LAPIC here, if that
> +			 * is the case, print out a error message once for each
> +			 * guest and return.
> +			 */
> +			if (!dst[idx-1] &&
> +				(kvm->arch.disabled_lapic_found == 0)) {

('!kvm->arch.disabled_lapic_found' would make it fit on one line.)

> +				kvm->arch.disabled_lapic_found = 1;
> +				printk(KERN_ERR

KERN_INFO is the maximal applicable level (and the appropriate one).
It's not an error on host side, just a pointer that the guest does
something stupid.

> +					"Disabled LAPIC found during irq injection\n");
> +				goto out;
> +			}
> +
> +			bitmap = 0;
> +			__set_bit(idx-1, &bitmap);
>  		}
>  	}
>  
> +set_irq:
>  	for_each_set_bit(i, &bitmap, 16) {
>  		if (!dst[i])
>  			continue;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> @@ -123,6 +123,9 @@ module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
>  unsigned int __read_mostly lapic_timer_advance_ns = 0;
>  module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
>  
> +bool __read_mostly enable_vector_hashing = 1;
> +module_param(enable_vector_hashing, bool, S_IRUGO);

I think the parameter is well described even without "enable" prefix,
thanks to "bool" type.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Jan. 22, 2016, 2:01 a.m. UTC | #10
> -----Original Message-----
> From: rkrcmar@redhat.com [mailto:rkrcmar@redhat.com]
> Sent: Friday, January 22, 2016 1:21 AM
> To: Wu, Feng <feng.wu@intel.com>
> Cc: Yang Zhang <yang.zhang.wz@gmail.com>; pbonzini@redhat.com; linux-
> kernel@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts



> > Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
> > to false like another function, I should add a "ret = false' here. We should
> > failed to inject the interrupt since hardware disabled LAPIC is found.
> 
> 'ret = true' is the better one.  We know that the interrupt is not
> deliverable [1], so there's no point in trying to deliver with the slow
> path.  We behave similarly when the interrupt targets a single disabled
> APIC.

Oh, yes, you are right, Thanks a lot!

Thanks,
Feng

> 
> ---
> 1: Well ... it's possible that slowpath would deliver it thanks to
>    different handling of disabled APICs, but it's undefined behavior,
>    so it doesn't matter matter if we don't try.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Jan. 22, 2016, 4 a.m. UTC | #11
On 2016/1/22 1:21, rkrcmar@redhat.com wrote:
> 2016-01-21 05:33+0000, Wu, Feng:
>>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>>> On 2016/1/20 9:42, Feng Wu wrote:
>>>> +			/*
>>>> +			 * We may find a hardware disabled LAPIC here, if
>>> that
>>>> +			 * is the case, print out a error message once for each
>>>> +			 * guest and return.
>>>> +			 */
>>>> +			if (!dst[idx-1] &&
>>>> +				(kvm->arch.disabled_lapic_found == 0)) {
>>>> +				kvm->arch.disabled_lapic_found = 1;
>>>> +				printk(KERN_ERR
>>>> +					"Disabled LAPIC found during irq
>>> injection\n");
>>>> +				goto out;
>>>
>>> What does "goto out" mean? Inject successfully or fail? According the
>>> value of ret which is set to ture here, it means inject successfully but
>
> (true actually means that fast path did the job and slow path isn't
>   needed.)
>
>>> i = -1.
>
> (I think there isn't a practical difference between *r=-1 and *r=0.)

Currently, if *r == -1, the remote_irr may get set. But it seems wrong. 
I need to have a double check to see whether it is a bug in current code.

>
>> Oh, I didn't notice 'ret' is initialized to true, I thought it was initialized
>> to false like another function, I should add a "ret = false' here. We should
>> failed to inject the interrupt since hardware disabled LAPIC is found.
>
> 'ret = true' is the better one.  We know that the interrupt is not
> deliverable [1], so there's no point in trying to deliver with the slow
> path.  We behave similarly when the interrupt targets a single disabled
> APIC.
>
> ---
> 1: Well ... it's possible that slowpath would deliver it thanks to
>     different handling of disabled APICs, but it's undefined behavior,

why it is undefined behavior? Besides, why we will keep two different 
handling logic for the fast path and slow path? It looks weird.

>     so it doesn't matter matter if we don't try.
>
Wu, Feng Jan. 22, 2016, 5:12 a.m. UTC | #12
> -----Original Message-----
> From: Radim Kr?má? [mailto:rkrcmar@redhat.com]
> Sent: Friday, January 22, 2016 3:50 AM
> To: Wu, Feng <feng.wu@intel.com>
> Cc: pbonzini@redhat.com; linux-kernel@vger.kernel.org;
> kvm@vger.kernel.org
> Subject: Re: [PATCH v3 2/4] KVM: x86: Use vector-hashing to deliver lowest-
> priority interrupts
> 
> 2016-01-20 09:42+0800, Feng Wu:
> > Use vector-hashing to deliver lowest-priority interrupts, As an
> > example, modern Intel CPUs in server platform use this method to
> > handle lowest-priority interrupts.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> 
> Functionality looks good, so I had a lot of stylistic comments, sorry :)

Any comments are welcome! Thank you! :)

> 
> > +		       const unsigned long *bitmap, u32 bitmap_size)
> > +{
> > +	u32 mod;
> > +	int i, idx = 0;
> > +
> > +	mod = vector % dest_vcpus;
> > +
> > +	for (i = 0; i <= mod; i++) {
> > +		idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
> 
> I'd remove this "+ 1".  Current users don't check for errors and always
> do "- 1".  The new error value could be 'idx = bitmap_size', with u32 as
> return type.
> 

Does the following code look good to you:

        u32 mod;
        int i, idx = -1;

        mod = vector % dest_vcpus;

        for (i = 0; i <= mod; i++) {
                idx = find_next_bit(bitmap, bitmap_size, idx + 1);
                BUG_ON(idx == bitmap_size);
        }

        return idx;

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 22, 2016, 1:49 p.m. UTC | #13
2016-01-22 12:00+0800, Yang Zhang:
> On 2016/1/22 1:21, rkrcmar@redhat.com wrote:
>>(I think there isn't a practical difference between *r=-1 and *r=0.)
> 
> Currently, if *r == -1, the remote_irr may get set. But it seems wrong. I

Yeah ...

> need to have a double check to see whether it is a bug in current code.

Looking forward to the patch!

Thanks.

>>'ret = true' is the better one.  We know that the interrupt is not
>>deliverable [1], so there's no point in trying to deliver with the slow
>>path.  We behave similarly when the interrupt targets a single disabled
>>APIC.
>>
>>---
>>1: Well ... it's possible that slowpath would deliver it thanks to
>>    different handling of disabled APICs, but it's undefined behavior,
> 
> why it is undefined behavior? Besides, why we will keep two different
> handling logic for the fast path and slow path? It looks weird.

It does look very weird ... the slow path would require refactoring,
though, so we save effort without a considerable drawback.
(I would love if it behaved identically, but I don't want to force it on
 someone and likely won't do it myself ...)

I consider it undefined because SMD says that an OS musn't configure
this behavior and doesn't say what should happen if the OS does => we
could do anything.  (Killing the guest would be great for debugging OS
issues, but ours behavior is fairly conservative.)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 22, 2016, 2:01 p.m. UTC | #14
2016-01-22 05:12+0000, Wu, Feng:
>> From: Radim Kr?má? [mailto:rkrcmar@redhat.com]
>> 2016-01-20 09:42+0800, Feng Wu:
>> > +{
>> > +	u32 mod;
>> > +	int i, idx = 0;
>> > +
>> > +	mod = vector % dest_vcpus;
>> > +
>> > +	for (i = 0; i <= mod; i++) {
>> > +		idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
>> 
>> I'd remove this "+ 1".  Current users don't check for errors and always
>> do "- 1".  The new error value could be 'idx = bitmap_size', with u32 as
>> return type.
>> 
> 
> Does the following code look good to you:
> 
>         u32 mod;
>         int i, idx = -1;
> 
>         mod = vector % dest_vcpus;
> 
>         for (i = 0; i <= mod; i++) {
>                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
>                 BUG_ON(idx == bitmap_size);
>         }
> 
>         return idx;

It's ok, thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 25, 2016, 12:25 p.m. UTC | #15
On 22/01/2016 15:01, Radim Krcmár wrote:
>>         for (i = 0; i <= mod; i++) {
>>                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
>>                 BUG_ON(idx == bitmap_size);
>>         }

WARN_ON, not BUG_ON.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Jan. 25, 2016, 3:20 p.m. UTC | #16
2016-01-25 13:25+0100, Paolo Bonzini:
> On 22/01/2016 15:01, Radim Krcmár wrote:
>>>         for (i = 0; i <= mod; i++) {
>>>                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
>>>                 BUG_ON(idx == bitmap_size);
>>>         }
> 
> WARN_ON, not BUG_ON.

Callers don't check the return value for an error, because every error
is a BUG now.  I think that we should check if we return bitmap_size.
(Current paths could dereference NULL or throw unrelated warnings.)

Also, WARN_ON_ONCE?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Jan. 25, 2016, 4:14 p.m. UTC | #17
On 25/01/2016 16:20, Radim Krcmár wrote:
> 2016-01-25 13:25+0100, Paolo Bonzini:
>> On 22/01/2016 15:01, Radim Krcmár wrote:
>>>>         for (i = 0; i <= mod; i++) {
>>>>                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
>>>>                 BUG_ON(idx == bitmap_size);
>>>>         }
>>
>> WARN_ON, not BUG_ON.
> 
> Callers don't check the return value for an error, because every error
> is a BUG now.  I think that we should check if we return bitmap_size.
> (Current paths could dereference NULL or throw unrelated warnings.)

You can probably just return a random number (e.g. zero or
find_first_bit) if the bug is hit.  But really, the bug is easy enough
to verify that BUG_ON might even be okay...

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Jan. 26, 2016, 1:10 a.m. UTC | #18
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogUGFvbG8gQm9uemluaSBb
bWFpbHRvOnBib256aW5pQHJlZGhhdC5jb21dDQo+IFNlbnQ6IFR1ZXNkYXksIEphbnVhcnkgMjYs
IDIwMTYgMTI6MTUgQU0NCj4gVG86IFJhZGltIEtyY23DoXIgPHJrcmNtYXJAcmVkaGF0LmNvbT4N
Cj4gQ2M6IFd1LCBGZW5nIDxmZW5nLnd1QGludGVsLmNvbT47IGxpbnV4LWtlcm5lbEB2Z2VyLmtl
cm5lbC5vcmc7DQo+IGt2bUB2Z2VyLmtlcm5lbC5vcmcNCj4gU3ViamVjdDogUmU6IFtQQVRDSCB2
MyAyLzRdIEtWTTogeDg2OiBVc2UgdmVjdG9yLWhhc2hpbmcgdG8gZGVsaXZlciBsb3dlc3QtDQo+
IHByaW9yaXR5IGludGVycnVwdHMNCj4gDQo+IA0KPiANCj4gT24gMjUvMDEvMjAxNiAxNjoyMCwg
UmFkaW0gS3JjbcOhciB3cm90ZToNCj4gPiAyMDE2LTAxLTI1IDEzOjI1KzAxMDAsIFBhb2xvIEJv
bnppbmk6DQo+ID4+IE9uIDIyLzAxLzIwMTYgMTU6MDEsIFJhZGltIEtyY23DoXIgd3JvdGU6DQo+
ID4+Pj4gICAgICAgICBmb3IgKGkgPSAwOyBpIDw9IG1vZDsgaSsrKSB7DQo+ID4+Pj4gICAgICAg
ICAgICAgICAgIGlkeCA9IGZpbmRfbmV4dF9iaXQoYml0bWFwLCBiaXRtYXBfc2l6ZSwgaWR4ICsg
MSk7DQo+ID4+Pj4gICAgICAgICAgICAgICAgIEJVR19PTihpZHggPT0gYml0bWFwX3NpemUpOw0K
PiA+Pj4+ICAgICAgICAgfQ0KPiA+Pg0KPiA+PiBXQVJOX09OLCBub3QgQlVHX09OLg0KPiA+DQo+
ID4gQ2FsbGVycyBkb24ndCBjaGVjayB0aGUgcmV0dXJuIHZhbHVlIGZvciBhbiBlcnJvciwgYmVj
YXVzZSBldmVyeSBlcnJvcg0KPiA+IGlzIGEgQlVHIG5vdy4gIEkgdGhpbmsgdGhhdCB3ZSBzaG91
bGQgY2hlY2sgaWYgd2UgcmV0dXJuIGJpdG1hcF9zaXplLg0KPiA+IChDdXJyZW50IHBhdGhzIGNv
dWxkIGRlcmVmZXJlbmNlIE5VTEwgb3IgdGhyb3cgdW5yZWxhdGVkIHdhcm5pbmdzLikNCj4gDQo+
IFlvdSBjYW4gcHJvYmFibHkganVzdCByZXR1cm4gYSByYW5kb20gbnVtYmVyIChlLmcuIHplcm8g
b3INCj4gZmluZF9maXJzdF9iaXQpIGlmIHRoZSBidWcgaXMgaGl0LiAgQnV0IHJlYWxseSwgdGhl
IGJ1ZyBpcyBlYXN5IGVub3VnaA0KPiB0byB2ZXJpZnkgdGhhdCBCVUdfT04gbWlnaHQgZXZlbiBi
ZSBva2F5Li4uDQoNClRoYW5rcyBhIGxvdCBmb3IgeW91ciBjb21tZW50cywgUmFkaW0gYW5kIFBh
b2xvISBBbnkgb3RoZXIgY29tbWVudHMNCnRvIG90aGVyIHBhdGNoZXMgaW4gdGhpcyBzZXJpZXMu
IElmIHRoaXMgaXMgdGhlIG9ubHkgY29tbWVudHMsIGRvIEkgbmVlZCB0bw0Kc2VuZCB2NT8NCg0K
VGhhbmtzLA0KRmVuZw0KDQo+IA0KPiBQYW9sbw0K
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 44adbb8..5054810 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -754,6 +754,8 @@  struct kvm_arch {
 
 	bool irqchip_split;
 	u8 nr_reserved_ioapic_pins;
+
+	int disabled_lapic_found;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 8fc89ef..062e907 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -34,6 +34,7 @@ 
 #include "lapic.h"
 
 #include "hyperv.h"
+#include "x86.h"
 
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   struct kvm *kvm, int irq_source_id, int level,
@@ -55,8 +56,10 @@  static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map)
 {
-	int i, r = -1;
+	int i, r = -1, idx = 0;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
+	unsigned long dest_vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	unsigned int dest_vcpus = 0;
 
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_lowest_prio_delivery(irq)) {
@@ -67,6 +70,8 @@  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
 		return r;
 
+	memset(dest_vcpu_bitmap, 0, sizeof(dest_vcpu_bitmap));
+
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
 			continue;
@@ -80,13 +85,25 @@  int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 				r = 0;
 			r += kvm_apic_set_irq(vcpu, irq, dest_map);
 		} else if (kvm_lapic_enabled(vcpu)) {
-			if (!lowest)
-				lowest = vcpu;
-			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
-				lowest = vcpu;
+			if (!kvm_vector_hashing_enabled()) {
+				if (!lowest)
+					lowest = vcpu;
+				else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+					lowest = vcpu;
+			} else {
+				__set_bit(i, dest_vcpu_bitmap);
+				dest_vcpus++;
+			}
 		}
 	}
 
+	if (dest_vcpus != 0) {
+		idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+					 dest_vcpu_bitmap, KVM_MAX_VCPUS);
+
+		lowest = kvm_get_vcpu(kvm, idx - 1);
+	}
+
 	if (lowest)
 		r = kvm_apic_set_irq(lowest, irq, dest_map);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 36591fa..e1a449da 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -675,6 +675,22 @@  bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	}
 }
 
+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+		       const unsigned long *bitmap, u32 bitmap_size)
+{
+	u32 mod;
+	int i, idx = 0;
+
+	mod = vector % dest_vcpus;
+
+	for (i = 0; i <= mod; i++) {
+		idx = find_next_bit(bitmap, bitmap_size, idx) + 1;
+		BUG_ON(idx > bitmap_size);
+	}
+
+	return idx;
+}
+
 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map)
 {
@@ -727,21 +743,51 @@  bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
 		dst = map->logical_map[cid];
 
-		if (kvm_lowest_prio_delivery(irq)) {
+		if (!kvm_lowest_prio_delivery(irq))
+			goto set_irq;
+
+		if (!kvm_vector_hashing_enabled()) {
 			int l = -1;
 			for_each_set_bit(i, &bitmap, 16) {
 				if (!dst[i])
 					continue;
 				if (l < 0)
 					l = i;
-				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+				else if (kvm_apic_compare_prio(dst[i]->vcpu,
+							dst[l]->vcpu) < 0)
 					l = i;
 			}
-
 			bitmap = (l >= 0) ? 1 << l : 0;
+		} else {
+			int idx = 0;
+			unsigned int dest_vcpus = 0;
+
+			dest_vcpus = hweight16(bitmap);
+			if (dest_vcpus == 0)
+				goto out;
+
+			idx = kvm_vector_2_index(irq->vector,
+				dest_vcpus, &bitmap, 16);
+
+			/*
+			 * We may find a hardware disabled LAPIC here, if that
+			 * is the case, print out a error message once for each
+			 * guest and return.
+			 */
+			if (!dst[idx-1] &&
+				(kvm->arch.disabled_lapic_found == 0)) {
+				kvm->arch.disabled_lapic_found = 1;
+				printk(KERN_ERR
+					"Disabled LAPIC found during irq injection\n");
+				goto out;
+			}
+
+			bitmap = 0;
+			__set_bit(idx-1, &bitmap);
 		}
 	}
 
+set_irq:
 	for_each_set_bit(i, &bitmap, 16) {
 		if (!dst[i])
 			continue;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 41bdb35..d864601 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -175,4 +175,6 @@  void wait_lapic_expire(struct kvm_vcpu *vcpu);
 
 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
+int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
+		       const unsigned long *bitmap, u32 bitmap_size);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4244c2b..47daf77 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -123,6 +123,9 @@  module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
 unsigned int __read_mostly lapic_timer_advance_ns = 0;
 module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
 
+bool __read_mostly enable_vector_hashing = 1;
+module_param(enable_vector_hashing, bool, S_IRUGO);
+
 static bool __read_mostly backwards_tsc_observed = false;
 
 #define KVM_NR_SHARED_MSRS 16
@@ -8370,6 +8373,12 @@  int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
 	return kvm_x86_ops->update_pi_irte(kvm, host_irq, guest_irq, set);
 }
 
+bool kvm_vector_hashing_enabled(void)
+{
+	return enable_vector_hashing;
+}
+EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index f2afa5f..04bd0f9 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -179,6 +179,7 @@  int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 					  int page_num);
+bool kvm_vector_hashing_enabled(void);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \