diff mbox

[v2,2/2] KVM: x86: Add lowest-priority support for vt-d posted-interrupts

Message ID 1450229853-3886-3-git-send-email-feng.wu@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Wu, Feng Dec. 16, 2015, 1:37 a.m. UTC
Use vector-hashing to deliver lowest-priority interrupts for
VT-d posted-interrupts.

Signed-off-by: Feng Wu <feng.wu@intel.com>
---
 arch/x86/kvm/lapic.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/lapic.h |  2 ++
 arch/x86/kvm/vmx.c   | 12 ++++++++--
 3 files changed, 79 insertions(+), 2 deletions(-)

Comments

Yang Zhang Dec. 21, 2015, 1:50 a.m. UTC | #1
On 2015/12/16 9:37, Feng Wu wrote:
> Use vector-hashing to deliver lowest-priority interrupts for
> VT-d posted-interrupts.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
>   arch/x86/kvm/lapic.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>   arch/x86/kvm/lapic.h |  2 ++
>   arch/x86/kvm/vmx.c   | 12 ++++++++--
>   3 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index e29001f..d4f2c8f 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -854,6 +854,73 @@ out:
>   }
>
>   /*
> + * This routine handles lowest-priority interrupts using vector-hashing
> + * mechanism. As an example, modern Intel CPUs use this method to handle
> + * lowest-priority interrupts.
> + *
> + * Here is the details about the vector-hashing mechanism:
> + * 1. For lowest-priority interrupts, store all the possible destination
> + *    vCPUs in an array.
> + * 2. Use "guest vector % max number of destination vCPUs" to find the right
> + *    destination vCPU in the array for the lowest-priority interrupt.
> + */
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> +					      struct kvm_lapic_irq *irq)
> +{
> +	struct kvm_apic_map *map;
> +	struct kvm_vcpu *vcpu = NULL;
> +
> +	if (irq->shorthand)
> +		return NULL;
> +
> +	rcu_read_lock();
> +	map = rcu_dereference(kvm->arch.apic_map);
> +
> +	if (!map)
> +		goto out;
> +
> +	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> +			kvm_lowest_prio_delivery(irq)) {
> +		u16 cid;
> +		int i, idx = 0;
> +		unsigned long bitmap = 1;
> +		unsigned int dest_vcpus = 0;
> +		struct kvm_lapic **dst = NULL;
> +
> +
> +		if (!kvm_apic_logical_map_valid(map))
> +			goto out;
> +
> +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> +
> +		if (cid >= ARRAY_SIZE(map->logical_map))
> +			goto out;
> +
> +		dst = map->logical_map[cid];
> +
> +		for_each_set_bit(i, &bitmap, 16) {
> +			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> +				clear_bit(i, &bitmap);
> +				continue;
> +			}
> +		}
> +
> +		dest_vcpus = hweight16(bitmap);
> +
> +		if (dest_vcpus != 0) {
> +			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> +						 &bitmap, 16);
> +			vcpu = dst[idx-1]->vcpu;
> +		}
> +	}
> +
> +out:
> +	rcu_read_unlock();
> +	return vcpu;
> +}
> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> +
> +/*
>    * Add a pending IRQ into lapic.
>    * Return 1 if successfully added and 0 if discarded.
>    */
> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> index 6890ef0..52bffce 100644
> --- a/arch/x86/kvm/lapic.h
> +++ b/arch/x86/kvm/lapic.h
> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
>   			struct kvm_vcpu **dest_vcpu);
>   int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>   		       const unsigned long *bitmap, u32 bitmap_size);
> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> +					      struct kvm_lapic_irq *irq);
>   #endif
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 5eb56ed..3f89189 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
>   		 */
>
>   		kvm_set_msi_irq(e, &irq);
> -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> -			continue;
> +
> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> +			if (!kvm_vector_hashing_enabled() ||
> +					irq.delivery_mode != APIC_DM_LOWEST)
> +				continue;
> +
> +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> +			if (!vcpu)
> +				continue;
> +		}

I am a little confused with the 'continue'. If the destination is not 
single vcpu, shouldn't we rollback to use non-PI mode?

>
>   		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
>   		vcpu_info.vector = irq.vector;
>
Wu, Feng Dec. 21, 2015, 1:55 a.m. UTC | #2
> -----Original Message-----
> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> owner@vger.kernel.org] On Behalf Of Yang Zhang
> Sent: Monday, December 21, 2015 9:50 AM
> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> rkrcmar@redhat.com
> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
> 
> On 2015/12/16 9:37, Feng Wu wrote:
> > Use vector-hashing to deliver lowest-priority interrupts for
> > VT-d posted-interrupts.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> >   arch/x86/kvm/lapic.c | 67
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >   arch/x86/kvm/lapic.h |  2 ++
> >   arch/x86/kvm/vmx.c   | 12 ++++++++--
> >   3 files changed, 79 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> > index e29001f..d4f2c8f 100644
> > --- a/arch/x86/kvm/lapic.c
> > +++ b/arch/x86/kvm/lapic.c
> > @@ -854,6 +854,73 @@ out:
> >   }
> >
> >   /*
> > + * This routine handles lowest-priority interrupts using vector-hashing
> > + * mechanism. As an example, modern Intel CPUs use this method to handle
> > + * lowest-priority interrupts.
> > + *
> > + * Here is the details about the vector-hashing mechanism:
> > + * 1. For lowest-priority interrupts, store all the possible destination
> > + *    vCPUs in an array.
> > + * 2. Use "guest vector % max number of destination vCPUs" to find the right
> > + *    destination vCPU in the array for the lowest-priority interrupt.
> > + */
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > +					      struct kvm_lapic_irq *irq)
> > +{
> > +	struct kvm_apic_map *map;
> > +	struct kvm_vcpu *vcpu = NULL;
> > +
> > +	if (irq->shorthand)
> > +		return NULL;
> > +
> > +	rcu_read_lock();
> > +	map = rcu_dereference(kvm->arch.apic_map);
> > +
> > +	if (!map)
> > +		goto out;
> > +
> > +	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> > +			kvm_lowest_prio_delivery(irq)) {
> > +		u16 cid;
> > +		int i, idx = 0;
> > +		unsigned long bitmap = 1;
> > +		unsigned int dest_vcpus = 0;
> > +		struct kvm_lapic **dst = NULL;
> > +
> > +
> > +		if (!kvm_apic_logical_map_valid(map))
> > +			goto out;
> > +
> > +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> > +
> > +		if (cid >= ARRAY_SIZE(map->logical_map))
> > +			goto out;
> > +
> > +		dst = map->logical_map[cid];
> > +
> > +		for_each_set_bit(i, &bitmap, 16) {
> > +			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> > +				clear_bit(i, &bitmap);
> > +				continue;
> > +			}
> > +		}
> > +
> > +		dest_vcpus = hweight16(bitmap);
> > +
> > +		if (dest_vcpus != 0) {
> > +			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> > +						 &bitmap, 16);
> > +			vcpu = dst[idx-1]->vcpu;
> > +		}
> > +	}
> > +
> > +out:
> > +	rcu_read_unlock();
> > +	return vcpu;
> > +}
> > +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> > +
> > +/*
> >    * Add a pending IRQ into lapic.
> >    * Return 1 if successfully added and 0 if discarded.
> >    */
> > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> > index 6890ef0..52bffce 100644
> > --- a/arch/x86/kvm/lapic.h
> > +++ b/arch/x86/kvm/lapic.h
> > @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> struct kvm_lapic_irq *irq,
> >   			struct kvm_vcpu **dest_vcpu);
> >   int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> >   		       const unsigned long *bitmap, u32 bitmap_size);
> > +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> > +					      struct kvm_lapic_irq *irq);
> >   #endif
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > index 5eb56ed..3f89189 100644
> > --- a/arch/x86/kvm/vmx.c
> > +++ b/arch/x86/kvm/vmx.c
> > @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm,
> unsigned int host_irq,
> >   		 */
> >
> >   		kvm_set_msi_irq(e, &irq);
> > -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> > -			continue;
> > +
> > +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> > +			if (!kvm_vector_hashing_enabled() ||
> > +					irq.delivery_mode !=
> APIC_DM_LOWEST)
> > +				continue;
> > +
> > +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> > +			if (!vcpu)
> > +				continue;
> > +		}
> 
> I am a little confused with the 'continue'. If the destination is not
> single vcpu, shouldn't we rollback to use non-PI mode?

Here is the logic:
- If it is single destination, we will use PI no matter it is fixed or lowest-priority.
- If it is not single destination:
	a) It is fixed, we will use non-PI
	b) It is lowest-priority and vector-hashing is enabled, we will use PI
	c) otherwise, use non-PI

Thanks,
Feng

> 
> >
> >   		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
> >   		vcpu_info.vector = irq.vector;
> >
> 
> 
> --
> best regards
> yang
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Dec. 21, 2015, 2:01 a.m. UTC | #3
On 2015/12/21 9:55, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>> Sent: Monday, December 21, 2015 9:50 AM
>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>> rkrcmar@redhat.com
>> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>> posted-interrupts
>>
>> On 2015/12/16 9:37, Feng Wu wrote:
>>> Use vector-hashing to deliver lowest-priority interrupts for
>>> VT-d posted-interrupts.
>>>
>>> Signed-off-by: Feng Wu <feng.wu@intel.com>
>>> ---
>>>    arch/x86/kvm/lapic.c | 67
>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>    arch/x86/kvm/lapic.h |  2 ++
>>>    arch/x86/kvm/vmx.c   | 12 ++++++++--
>>>    3 files changed, 79 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>> index e29001f..d4f2c8f 100644
>>> --- a/arch/x86/kvm/lapic.c
>>> +++ b/arch/x86/kvm/lapic.c
>>> @@ -854,6 +854,73 @@ out:
>>>    }
>>>
>>>    /*
>>> + * This routine handles lowest-priority interrupts using vector-hashing
>>> + * mechanism. As an example, modern Intel CPUs use this method to handle
>>> + * lowest-priority interrupts.
>>> + *
>>> + * Here is the details about the vector-hashing mechanism:
>>> + * 1. For lowest-priority interrupts, store all the possible destination
>>> + *    vCPUs in an array.
>>> + * 2. Use "guest vector % max number of destination vCPUs" to find the right
>>> + *    destination vCPU in the array for the lowest-priority interrupt.
>>> + */
>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>> +					      struct kvm_lapic_irq *irq)
>>> +{
>>> +	struct kvm_apic_map *map;
>>> +	struct kvm_vcpu *vcpu = NULL;
>>> +
>>> +	if (irq->shorthand)
>>> +		return NULL;
>>> +
>>> +	rcu_read_lock();
>>> +	map = rcu_dereference(kvm->arch.apic_map);
>>> +
>>> +	if (!map)
>>> +		goto out;
>>> +
>>> +	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
>>> +			kvm_lowest_prio_delivery(irq)) {
>>> +		u16 cid;
>>> +		int i, idx = 0;
>>> +		unsigned long bitmap = 1;
>>> +		unsigned int dest_vcpus = 0;
>>> +		struct kvm_lapic **dst = NULL;
>>> +
>>> +
>>> +		if (!kvm_apic_logical_map_valid(map))
>>> +			goto out;
>>> +
>>> +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
>>> +
>>> +		if (cid >= ARRAY_SIZE(map->logical_map))
>>> +			goto out;
>>> +
>>> +		dst = map->logical_map[cid];
>>> +
>>> +		for_each_set_bit(i, &bitmap, 16) {
>>> +			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>> +				clear_bit(i, &bitmap);
>>> +				continue;
>>> +			}
>>> +		}
>>> +
>>> +		dest_vcpus = hweight16(bitmap);
>>> +
>>> +		if (dest_vcpus != 0) {
>>> +			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
>>> +						 &bitmap, 16);
>>> +			vcpu = dst[idx-1]->vcpu;
>>> +		}
>>> +	}
>>> +
>>> +out:
>>> +	rcu_read_unlock();
>>> +	return vcpu;
>>> +}
>>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
>>> +
>>> +/*
>>>     * Add a pending IRQ into lapic.
>>>     * Return 1 if successfully added and 0 if discarded.
>>>     */
>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>> index 6890ef0..52bffce 100644
>>> --- a/arch/x86/kvm/lapic.h
>>> +++ b/arch/x86/kvm/lapic.h
>>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>> struct kvm_lapic_irq *irq,
>>>    			struct kvm_vcpu **dest_vcpu);
>>>    int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>>>    		       const unsigned long *bitmap, u32 bitmap_size);
>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>> +					      struct kvm_lapic_irq *irq);
>>>    #endif
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 5eb56ed..3f89189 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm,
>> unsigned int host_irq,
>>>    		 */
>>>
>>>    		kvm_set_msi_irq(e, &irq);
>>> -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>> -			continue;
>>> +
>>> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>> +			if (!kvm_vector_hashing_enabled() ||
>>> +					irq.delivery_mode !=
>> APIC_DM_LOWEST)
>>> +				continue;
>>> +
>>> +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>> +			if (!vcpu)
>>> +				continue;
>>> +		}
>>
>> I am a little confused with the 'continue'. If the destination is not
>> single vcpu, shouldn't we rollback to use non-PI mode?
>
> Here is the logic:
> - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
> - If it is not single destination:
> 	a) It is fixed, we will use non-PI
> 	b) It is lowest-priority and vector-hashing is enabled, we will use PI
> 	c) otherwise, use non-PI

If it is single destination previously, then change to no-single mode. 
Can current code cover this case?

>
> Thanks,
> Feng
>
>>
>>>
>>>    		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
>>>    		vcpu_info.vector = irq.vector;
>>>
>>
>>
>> --
>> best regards
>> yang
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/
Wu, Feng Dec. 22, 2015, 4:36 a.m. UTC | #4
> -----Original Message-----
> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
> Sent: Monday, December 21, 2015 10:01 AM
> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> rkrcmar@redhat.com
> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
> 
> On 2015/12/21 9:55, Wu, Feng wrote:
> >
> >
> >> -----Original Message-----
> >> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
> >> owner@vger.kernel.org] On Behalf Of Yang Zhang
> >> Sent: Monday, December 21, 2015 9:50 AM
> >> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
> >> rkrcmar@redhat.com
> >> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> >> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> >> posted-interrupts
> >>
> >> On 2015/12/16 9:37, Feng Wu wrote:
> >>> Use vector-hashing to deliver lowest-priority interrupts for
> >>> VT-d posted-interrupts.
> >>>
> >>> Signed-off-by: Feng Wu <feng.wu@intel.com>
> >>> ---
> >>>    arch/x86/kvm/lapic.c | 67
> >> ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>    arch/x86/kvm/lapic.h |  2 ++
> >>>    arch/x86/kvm/vmx.c   | 12 ++++++++--
> >>>    3 files changed, 79 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> >>> index e29001f..d4f2c8f 100644
> >>> --- a/arch/x86/kvm/lapic.c
> >>> +++ b/arch/x86/kvm/lapic.c
> >>> @@ -854,6 +854,73 @@ out:
> >>>    }
> >>>
> >>>    /*
> >>> + * This routine handles lowest-priority interrupts using vector-hashing
> >>> + * mechanism. As an example, modern Intel CPUs use this method to
> handle
> >>> + * lowest-priority interrupts.
> >>> + *
> >>> + * Here is the details about the vector-hashing mechanism:
> >>> + * 1. For lowest-priority interrupts, store all the possible destination
> >>> + *    vCPUs in an array.
> >>> + * 2. Use "guest vector % max number of destination vCPUs" to find the
> right
> >>> + *    destination vCPU in the array for the lowest-priority interrupt.
> >>> + */
> >>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >>> +					      struct kvm_lapic_irq *irq)
> >>> +{
> >>> +	struct kvm_apic_map *map;
> >>> +	struct kvm_vcpu *vcpu = NULL;
> >>> +
> >>> +	if (irq->shorthand)
> >>> +		return NULL;
> >>> +
> >>> +	rcu_read_lock();
> >>> +	map = rcu_dereference(kvm->arch.apic_map);
> >>> +
> >>> +	if (!map)
> >>> +		goto out;
> >>> +
> >>> +	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
> >>> +			kvm_lowest_prio_delivery(irq)) {
> >>> +		u16 cid;
> >>> +		int i, idx = 0;
> >>> +		unsigned long bitmap = 1;
> >>> +		unsigned int dest_vcpus = 0;
> >>> +		struct kvm_lapic **dst = NULL;
> >>> +
> >>> +
> >>> +		if (!kvm_apic_logical_map_valid(map))
> >>> +			goto out;
> >>> +
> >>> +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
> >>> +
> >>> +		if (cid >= ARRAY_SIZE(map->logical_map))
> >>> +			goto out;
> >>> +
> >>> +		dst = map->logical_map[cid];
> >>> +
> >>> +		for_each_set_bit(i, &bitmap, 16) {
> >>> +			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
> >>> +				clear_bit(i, &bitmap);
> >>> +				continue;
> >>> +			}
> >>> +		}
> >>> +
> >>> +		dest_vcpus = hweight16(bitmap);
> >>> +
> >>> +		if (dest_vcpus != 0) {
> >>> +			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
> >>> +						 &bitmap, 16);
> >>> +			vcpu = dst[idx-1]->vcpu;
> >>> +		}
> >>> +	}
> >>> +
> >>> +out:
> >>> +	rcu_read_unlock();
> >>> +	return vcpu;
> >>> +}
> >>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
> >>> +
> >>> +/*
> >>>     * Add a pending IRQ into lapic.
> >>>     * Return 1 if successfully added and 0 if discarded.
> >>>     */
> >>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
> >>> index 6890ef0..52bffce 100644
> >>> --- a/arch/x86/kvm/lapic.h
> >>> +++ b/arch/x86/kvm/lapic.h
> >>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
> >> struct kvm_lapic_irq *irq,
> >>>    			struct kvm_vcpu **dest_vcpu);
> >>>    int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
> >>>    		       const unsigned long *bitmap, u32 bitmap_size);
> >>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
> >>> +					      struct kvm_lapic_irq *irq);
> >>>    #endif
> >>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> >>> index 5eb56ed..3f89189 100644
> >>> --- a/arch/x86/kvm/vmx.c
> >>> +++ b/arch/x86/kvm/vmx.c
> >>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
> *kvm,
> >> unsigned int host_irq,
> >>>    		 */
> >>>
> >>>    		kvm_set_msi_irq(e, &irq);
> >>> -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> >>> -			continue;
> >>> +
> >>> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> >>> +			if (!kvm_vector_hashing_enabled() ||
> >>> +					irq.delivery_mode !=
> >> APIC_DM_LOWEST)
> >>> +				continue;
> >>> +
> >>> +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> >>> +			if (!vcpu)
> >>> +				continue;
> >>> +		}
> >>
> >> I am a little confused with the 'continue'. If the destination is not
> >> single vcpu, shouldn't we rollback to use non-PI mode?
> >
> > Here is the logic:
> > - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
> > - If it is not single destination:
> > 	a) It is fixed, we will use non-PI
> > 	b) It is lowest-priority and vector-hashing is enabled, we will use PI
> > 	c) otherwise, use non-PI
> 
> If it is single destination previously, then change to no-single mode.
> Can current code cover this case?

In my test, before setting irq affinity (change single vcpu to non-single vcpu
in this case), the guest will mask the interrupt first, so before getting here, IRTE
has been changed back to remapped mode already(when guest masks the MSIx,
we will change back to remapped mode), hence nothing needed here.

Digging into the linux code (guest) a bit more, I found that if interrupt remapping
is not enabled in the guest (IR is not supported for guest anyway), it will always
mask the MSI/MSIx before setting the irq affinity. So the code should work
well currently.

However, for robustness, I think explicitly changing IRTE back to remapped
mode for the 'continue' case should be a good idea.

Radim, Paolo, what are your guys' options about this? Any comments are
appreciated! Thanks a lot!

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yang Zhang Dec. 22, 2015, 6:42 a.m. UTC | #5
On 2015/12/22 12:36, Wu, Feng wrote:
>
>
>> -----Original Message-----
>> From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
>> Sent: Monday, December 21, 2015 10:01 AM
>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>> rkrcmar@redhat.com
>> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>> posted-interrupts
>>
>> On 2015/12/21 9:55, Wu, Feng wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>>>> owner@vger.kernel.org] On Behalf Of Yang Zhang
>>>> Sent: Monday, December 21, 2015 9:50 AM
>>>> To: Wu, Feng <feng.wu@intel.com>; pbonzini@redhat.com;
>>>> rkrcmar@redhat.com
>>>> Cc: kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>>>> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
>>>> posted-interrupts
>>>>
>>>> On 2015/12/16 9:37, Feng Wu wrote:
>>>>> Use vector-hashing to deliver lowest-priority interrupts for
>>>>> VT-d posted-interrupts.
>>>>>
>>>>> Signed-off-by: Feng Wu <feng.wu@intel.com>
>>>>> ---
>>>>>     arch/x86/kvm/lapic.c | 67
>>>> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>     arch/x86/kvm/lapic.h |  2 ++
>>>>>     arch/x86/kvm/vmx.c   | 12 ++++++++--
>>>>>     3 files changed, 79 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
>>>>> index e29001f..d4f2c8f 100644
>>>>> --- a/arch/x86/kvm/lapic.c
>>>>> +++ b/arch/x86/kvm/lapic.c
>>>>> @@ -854,6 +854,73 @@ out:
>>>>>     }
>>>>>
>>>>>     /*
>>>>> + * This routine handles lowest-priority interrupts using vector-hashing
>>>>> + * mechanism. As an example, modern Intel CPUs use this method to
>> handle
>>>>> + * lowest-priority interrupts.
>>>>> + *
>>>>> + * Here is the details about the vector-hashing mechanism:
>>>>> + * 1. For lowest-priority interrupts, store all the possible destination
>>>>> + *    vCPUs in an array.
>>>>> + * 2. Use "guest vector % max number of destination vCPUs" to find the
>> right
>>>>> + *    destination vCPU in the array for the lowest-priority interrupt.
>>>>> + */
>>>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>>>> +					      struct kvm_lapic_irq *irq)
>>>>> +{
>>>>> +	struct kvm_apic_map *map;
>>>>> +	struct kvm_vcpu *vcpu = NULL;
>>>>> +
>>>>> +	if (irq->shorthand)
>>>>> +		return NULL;
>>>>> +
>>>>> +	rcu_read_lock();
>>>>> +	map = rcu_dereference(kvm->arch.apic_map);
>>>>> +
>>>>> +	if (!map)
>>>>> +		goto out;
>>>>> +
>>>>> +	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
>>>>> +			kvm_lowest_prio_delivery(irq)) {
>>>>> +		u16 cid;
>>>>> +		int i, idx = 0;
>>>>> +		unsigned long bitmap = 1;
>>>>> +		unsigned int dest_vcpus = 0;
>>>>> +		struct kvm_lapic **dst = NULL;
>>>>> +
>>>>> +
>>>>> +		if (!kvm_apic_logical_map_valid(map))
>>>>> +			goto out;
>>>>> +
>>>>> +		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
>>>>> +
>>>>> +		if (cid >= ARRAY_SIZE(map->logical_map))
>>>>> +			goto out;
>>>>> +
>>>>> +		dst = map->logical_map[cid];
>>>>> +
>>>>> +		for_each_set_bit(i, &bitmap, 16) {
>>>>> +			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
>>>>> +				clear_bit(i, &bitmap);
>>>>> +				continue;
>>>>> +			}
>>>>> +		}
>>>>> +
>>>>> +		dest_vcpus = hweight16(bitmap);
>>>>> +
>>>>> +		if (dest_vcpus != 0) {
>>>>> +			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
>>>>> +						 &bitmap, 16);
>>>>> +			vcpu = dst[idx-1]->vcpu;
>>>>> +		}
>>>>> +	}
>>>>> +
>>>>> +out:
>>>>> +	rcu_read_unlock();
>>>>> +	return vcpu;
>>>>> +}
>>>>> +EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
>>>>> +
>>>>> +/*
>>>>>      * Add a pending IRQ into lapic.
>>>>>      * Return 1 if successfully added and 0 if discarded.
>>>>>      */
>>>>> diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
>>>>> index 6890ef0..52bffce 100644
>>>>> --- a/arch/x86/kvm/lapic.h
>>>>> +++ b/arch/x86/kvm/lapic.h
>>>>> @@ -172,4 +172,6 @@ bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm,
>>>> struct kvm_lapic_irq *irq,
>>>>>     			struct kvm_vcpu **dest_vcpu);
>>>>>     int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
>>>>>     		       const unsigned long *bitmap, u32 bitmap_size);
>>>>> +struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
>>>>> +					      struct kvm_lapic_irq *irq);
>>>>>     #endif
>>>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>> index 5eb56ed..3f89189 100644
>>>>> --- a/arch/x86/kvm/vmx.c
>>>>> +++ b/arch/x86/kvm/vmx.c
>>>>> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
>> *kvm,
>>>> unsigned int host_irq,
>>>>>     		 */
>>>>>
>>>>>     		kvm_set_msi_irq(e, &irq);
>>>>> -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>>>> -			continue;
>>>>> +
>>>>> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>>>> +			if (!kvm_vector_hashing_enabled() ||
>>>>> +					irq.delivery_mode !=
>>>> APIC_DM_LOWEST)
>>>>> +				continue;
>>>>> +
>>>>> +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>>>> +			if (!vcpu)
>>>>> +				continue;
>>>>> +		}
>>>>
>>>> I am a little confused with the 'continue'. If the destination is not
>>>> single vcpu, shouldn't we rollback to use non-PI mode?
>>>
>>> Here is the logic:
>>> - If it is single destination, we will use PI no matter it is fixed or lowest-priority.
>>> - If it is not single destination:
>>> 	a) It is fixed, we will use non-PI
>>> 	b) It is lowest-priority and vector-hashing is enabled, we will use PI
>>> 	c) otherwise, use non-PI
>>
>> If it is single destination previously, then change to no-single mode.
>> Can current code cover this case?
>
> In my test, before setting irq affinity (change single vcpu to non-single vcpu
> in this case), the guest will mask the interrupt first, so before getting here, IRTE
> has been changed back to remapped mode already(when guest masks the MSIx,
> we will change back to remapped mode), hence nothing needed here.
>
> Digging into the linux code (guest) a bit more, I found that if interrupt remapping
> is not enabled in the guest (IR is not supported for guest anyway), it will always
> mask the MSI/MSIx before setting the irq affinity. So the code should work
> well currently.

We should not rely on guest's behavior. From code level, it need be fixed.

>
> However, for robustness, I think explicitly changing IRTE back to remapped
> mode for the 'continue' case should be a good idea.

This is what i am looking for.

>
> Radim, Paolo, what are your guys' options about this? Any comments are
> appreciated! Thanks a lot!
>
> Thanks,
> Feng
>
Radim Krčmář Dec. 23, 2015, 4:50 p.m. UTC | #6
2015-12-22 14:42+0800, Yang Zhang:
> On 2015/12/22 12:36, Wu, Feng wrote:
>>>From: Yang Zhang [mailto:yang.zhang.wz@gmail.com]
>>>On 2015/12/21 9:55, Wu, Feng wrote:
>>>>>From: linux-kernel-owner@vger.kernel.org [mailto:linux-kernel-
>>>>>On 2015/12/16 9:37, Feng Wu wrote:
>>>>>>diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>>>>>@@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
>>>*kvm,
>>>>>unsigned int host_irq,
>>>>>>    		 */
>>>>>>
>>>>>>    		kvm_set_msi_irq(e, &irq);
>>>>>>-		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
>>>>>>-			continue;
>>>>>>+
>>>>>>+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
>>>>>>+			if (!kvm_vector_hashing_enabled() ||
>>>>>>+					irq.delivery_mode !=
>>>>>APIC_DM_LOWEST)
>>>>>>+				continue;
>>>>>>+
>>>>>>+			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
>>>>>>+			if (!vcpu)
>>>>>>+				continue;
>>>>>>+		}
>>>>>
>>>>>I am a little confused with the 'continue'. If the destination is not
>>>>>single vcpu, shouldn't we rollback to use non-PI mode?
>>>>
>>>>Here is the logic:
>>>>- If it is single destination, we will use PI no matter it is fixed or lowest-priority.
>>>>- If it is not single destination:
>>>>	a) It is fixed, we will use non-PI
>>>>	b) It is lowest-priority and vector-hashing is enabled, we will use PI
>>>>	c) otherwise, use non-PI
>>>
>>>If it is single destination previously, then change to no-single mode.
>>>Can current code cover this case?
>>
>>In my test, before setting irq affinity (change single vcpu to non-single vcpu
>>in this case), the guest will mask the interrupt first, so before getting here, IRTE
>>has been changed back to remapped mode already(when guest masks the MSIx,
>>we will change back to remapped mode), hence nothing needed here.
>>
>>Digging into the linux code (guest) a bit more, I found that if interrupt remapping
>>is not enabled in the guest (IR is not supported for guest anyway), it will always
>>mask the MSI/MSIx before setting the irq affinity. So the code should work
>>well currently.
> 
> We should not rely on guest's behavior. From code level, it need be fixed.
> 
>>However, for robustness, I think explicitly changing IRTE back to remapped
>>mode for the 'continue' case should be a good idea.
> 
> This is what i am looking for.

I agree, that would be a nice addition.

IIRC, the masking is optional -- if the guest can handle interrupts that
are generated while the device is half-configured, it doesn't need to
disable MSIs.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Radim Krčmář Dec. 23, 2015, 5:21 p.m. UTC | #7
2015-12-16 09:37+0800, Feng Wu:
> Use vector-hashing to deliver lowest-priority interrupts for
> VT-d posted-interrupts.
> 
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
>  		 */
>  
>  		kvm_set_msi_irq(e, &irq);
> -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> -			continue;
> +
> +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> +			if (!kvm_vector_hashing_enabled() ||
> +					irq.delivery_mode != APIC_DM_LOWEST)

Hm, wouldn't it actually be easier to extend kvm_intr_is_single_vcpu()
with vector hashing? :)

> +				continue;
> +
> +			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
> +			if (!vcpu)
> +				continue;
> +		}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Wu, Feng Jan. 4, 2016, 1:57 a.m. UTC | #8
> -----Original Message-----
> From: Radim Kr?má? [mailto:rkrcmar@redhat.com]
> Sent: Thursday, December 24, 2015 1:22 AM
> To: Wu, Feng <feng.wu@intel.com>
> Cc: pbonzini@redhat.com; kvm@vger.kernel.org; linux-
> kernel@vger.kernel.org
> Subject: Re: [PATCH v2 2/2] KVM: x86: Add lowest-priority support for vt-d
> posted-interrupts
> 
> 2015-12-16 09:37+0800, Feng Wu:
> > Use vector-hashing to deliver lowest-priority interrupts for
> > VT-d posted-interrupts.
> >
> > Signed-off-by: Feng Wu <feng.wu@intel.com>
> > ---
> > diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> > @@ -10702,8 +10702,16 @@ static int vmx_update_pi_irte(struct kvm
> *kvm, unsigned int host_irq,
> >  		 */
> >
> >  		kvm_set_msi_irq(e, &irq);
> > -		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
> > -			continue;
> > +
> > +		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
> > +			if (!kvm_vector_hashing_enabled() ||
> > +					irq.delivery_mode !=
> APIC_DM_LOWEST)
> 
> Hm, wouldn't it actually be easier to extend kvm_intr_is_single_vcpu()
> with vector hashing? :)

Yes, it is a good idea, I will try that! Thanks a lot!

Thanks,
Feng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e29001f..d4f2c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -854,6 +854,73 @@  out:
 }
 
 /*
+ * This routine handles lowest-priority interrupts using vector-hashing
+ * mechanism. As an example, modern Intel CPUs use this method to handle
+ * lowest-priority interrupts.
+ *
+ * Here is the details about the vector-hashing mechanism:
+ * 1. For lowest-priority interrupts, store all the possible destination
+ *    vCPUs in an array.
+ * 2. Use "guest vector % max number of destination vCPUs" to find the right
+ *    destination vCPU in the array for the lowest-priority interrupt.
+ */
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+					      struct kvm_lapic_irq *irq)
+{
+	struct kvm_apic_map *map;
+	struct kvm_vcpu *vcpu = NULL;
+
+	if (irq->shorthand)
+		return NULL;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if ((irq->dest_mode != APIC_DEST_PHYSICAL) &&
+			kvm_lowest_prio_delivery(irq)) {
+		u16 cid;
+		int i, idx = 0;
+		unsigned long bitmap = 1;
+		unsigned int dest_vcpus = 0;
+		struct kvm_lapic **dst = NULL;
+
+
+		if (!kvm_apic_logical_map_valid(map))
+			goto out;
+
+		apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
+
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
+
+		dst = map->logical_map[cid];
+
+		for_each_set_bit(i, &bitmap, 16) {
+			if (!dst[i] && !kvm_lapic_enabled(dst[i]->vcpu)) {
+				clear_bit(i, &bitmap);
+				continue;
+			}
+		}
+
+		dest_vcpus = hweight16(bitmap);
+
+		if (dest_vcpus != 0) {
+			idx = kvm_vector_2_index(irq->vector, dest_vcpus,
+						 &bitmap, 16);
+			vcpu = dst[idx-1]->vcpu;
+		}
+	}
+
+out:
+	rcu_read_unlock();
+	return vcpu;
+}
+EXPORT_SYMBOL_GPL(kvm_intr_vector_hashing_dest);
+
+/*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
  */
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6890ef0..52bffce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -172,4 +172,6 @@  bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
 			struct kvm_vcpu **dest_vcpu);
 int kvm_vector_2_index(u32 vector, u32 dest_vcpus,
 		       const unsigned long *bitmap, u32 bitmap_size);
+struct kvm_vcpu *kvm_intr_vector_hashing_dest(struct kvm *kvm,
+					      struct kvm_lapic_irq *irq);
 #endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5eb56ed..3f89189 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -10702,8 +10702,16 @@  static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 		 */
 
 		kvm_set_msi_irq(e, &irq);
-		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu))
-			continue;
+
+		if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+			if (!kvm_vector_hashing_enabled() ||
+					irq.delivery_mode != APIC_DM_LOWEST)
+				continue;
+
+			vcpu = kvm_intr_vector_hashing_dest(kvm, &irq);
+			if (!vcpu)
+				continue;
+		}
 
 		vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
 		vcpu_info.vector = irq.vector;