Message ID | 1442393409-2623-4-git-send-email-feng.wu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 16/09/2015 10:49, Feng Wu wrote: > This patch defines a new interface kvm_intr_is_single_vcpu(), > which can returns whether the interrupt is for single-CPU or not. > > It is used by VT-d PI, since now we only support single-CPU > interrupts, For lowest-priority interrupts, if user configures > it via /proc/irq or uses irqbalance to make it single-CPU, we > can use PI to deliver the interrupts to it. Full functionality > of lowest-priority support will be added later. > > Signed-off-by: Feng Wu <feng.wu@intel.com> > --- > v8: > - Some optimizations in kvm_intr_is_single_vcpu(). > - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code. > - Add kvm_intr_is_single_vcpu_fast() as the fast path to find > the target vCPU for the single-destination interrupt > > arch/x86/include/asm/kvm_host.h | 3 ++ > arch/x86/kvm/irq_comm.c | 94 +++++++++++++++++++++++++++++++++++++++++ > arch/x86/kvm/lapic.c | 5 +-- > arch/x86/kvm/lapic.h | 2 + > 4 files changed, 101 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 49ec903..af11bca 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm, > int x86_set_memory_region(struct kvm *kvm, > const struct kvm_userspace_memory_region *mem); > > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > + struct kvm_vcpu **dest_vcpu); > + > #endif /* _ASM_X86_KVM_HOST_H */ > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > index 9efff9e..97ba1d6 100644 > --- a/arch/x86/kvm/irq_comm.c > +++ b/arch/x86/kvm/irq_comm.c > @@ -297,6 +297,100 @@ out: > return r; > } > > +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, > + struct kvm_lapic_irq *irq, > + struct kvm_vcpu **dest_vcpu) Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so that you do not have to export other functions. > +{ > + struct kvm_apic_map *map; > + bool ret = false; > + struct kvm_lapic *dst = NULL; > + > + if (irq->shorthand) > + return false; > + > + rcu_read_lock(); > + map = rcu_dereference(kvm->arch.apic_map); > + > + if (!map) > + goto out; > + > + if (irq->dest_mode == APIC_DEST_PHYSICAL) { > + if (irq->dest_id == 0xFF) > + goto out; > + > + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { Warning here is wrong, the guest can trigger it. > + WARN_ON_ONCE(1); > + goto out; > + } > + > + dst = map->phys_map[irq->dest_id]; > + if (dst && kvm_apic_present(dst->vcpu)) > + *dest_vcpu = dst->vcpu; > + else > + goto out; > + } else { > + u16 cid; > + unsigned long bitmap = 1; > + int i, r = 0; > + > + if (!kvm_apic_logical_map_valid(map)) { > + WARN_ON_ONCE(1); Same here. > + goto out; > + } > + > + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); > + > + if (cid >= ARRAY_SIZE(map->logical_map)) { > + WARN_ON_ONCE(1); Same here. Otherwise looks good. Paolo > + goto out; > + } > + > + for_each_set_bit(i, &bitmap, 16) { > + dst = map->logical_map[cid][i]; > + if (++r == 2) > + goto out; > + } > + > + if (dst && kvm_apic_present(dst->vcpu)) > + *dest_vcpu = dst->vcpu; > + else > + goto out; > + } > + > + ret = true; > +out: > + rcu_read_unlock(); > + return ret; > +} > + > + > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > + struct kvm_vcpu **dest_vcpu) > +{ > + int i, r = 0; > + struct kvm_vcpu *vcpu; > + > + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) > + return true; > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (!kvm_apic_present(vcpu)) > + continue; > + > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > + irq->dest_id, irq->dest_mode)) > + continue; > + > + if (++r == 2) > + return false; > + > + *dest_vcpu = vcpu; > + } > + > + return r == 1; > +} > +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); > + > #define IOAPIC_ROUTING_ENTRY(irq) \ > { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ > .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 2a5ca97..9848cd50 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) > /* The logical map is definitely wrong if we have multiple > * modes at the same time. (Physical map is always right.) > */ > -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) > { > return !(map->mode & (map->mode - 1)); > } > > -static inline void > -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) > { > unsigned lid_bits; > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > index 7195274..6798b87 100644 > --- a/arch/x86/kvm/lapic.h > +++ b/arch/x86/kvm/lapic.h > @@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); > > void wait_lapic_expire(struct kvm_vcpu *vcpu); > > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid); > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map); > #endif > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Paolo Bonzini [mailto:pbonzini@redhat.com] > Sent: Wednesday, September 16, 2015 5:23 PM > To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org; > mtosatti@redhat.com > Cc: eric.auger@linaro.org; kvm@vger.kernel.org; > iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH v8 03/13] KVM: Define a new interface > kvm_intr_is_single_vcpu() > > > > On 16/09/2015 10:49, Feng Wu wrote: > > This patch defines a new interface kvm_intr_is_single_vcpu(), > > which can returns whether the interrupt is for single-CPU or not. > > > > It is used by VT-d PI, since now we only support single-CPU > > interrupts, For lowest-priority interrupts, if user configures > > it via /proc/irq or uses irqbalance to make it single-CPU, we > > can use PI to deliver the interrupts to it. Full functionality > > of lowest-priority support will be added later. > > > > Signed-off-by: Feng Wu <feng.wu@intel.com> > > --- > > v8: > > - Some optimizations in kvm_intr_is_single_vcpu(). > > - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code. > > - Add kvm_intr_is_single_vcpu_fast() as the fast path to find > > the target vCPU for the single-destination interrupt > > > > arch/x86/include/asm/kvm_host.h | 3 ++ > > arch/x86/kvm/irq_comm.c | 94 > +++++++++++++++++++++++++++++++++++++++++ > > arch/x86/kvm/lapic.c | 5 +-- > > arch/x86/kvm/lapic.h | 2 + > > 4 files changed, 101 insertions(+), 3 deletions(-) > > > > diff --git a/arch/x86/include/asm/kvm_host.h > b/arch/x86/include/asm/kvm_host.h > > index 49ec903..af11bca 100644 > > --- a/arch/x86/include/asm/kvm_host.h > > +++ b/arch/x86/include/asm/kvm_host.h > > @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm, > > int x86_set_memory_region(struct kvm *kvm, > > const struct kvm_userspace_memory_region *mem); > > > > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > > + struct kvm_vcpu **dest_vcpu); > > + > > #endif /* _ASM_X86_KVM_HOST_H */ > > diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c > > index 9efff9e..97ba1d6 100644 > > --- a/arch/x86/kvm/irq_comm.c > > +++ b/arch/x86/kvm/irq_comm.c > > @@ -297,6 +297,100 @@ out: > > return r; > > } > > > > +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, > > + struct kvm_lapic_irq *irq, > > + struct kvm_vcpu **dest_vcpu) > > Please put this in lapic.c, similar to kvm_irq_delivery_to_apic_fast, so > that you do not have to export other functions. > > > +{ > > + struct kvm_apic_map *map; > > + bool ret = false; > > + struct kvm_lapic *dst = NULL; > > + > > + if (irq->shorthand) > > + return false; > > + > > + rcu_read_lock(); > > + map = rcu_dereference(kvm->arch.apic_map); > > + > > + if (!map) > > + goto out; > > + > > + if (irq->dest_mode == APIC_DEST_PHYSICAL) { > > + if (irq->dest_id == 0xFF) > > + goto out; > > + > > + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { > > Warning here is wrong, the guest can trigger it. Could you please share more information about how the guest triggers these conditions (including the following two), Thanks a lot! Thanks, Feng > > > + WARN_ON_ONCE(1); > > + goto out; > > + } > > + > > + dst = map->phys_map[irq->dest_id]; > > + if (dst && kvm_apic_present(dst->vcpu)) > > + *dest_vcpu = dst->vcpu; > > + else > > + goto out; > > + } else { > > + u16 cid; > > + unsigned long bitmap = 1; > > + int i, r = 0; > > + > > + if (!kvm_apic_logical_map_valid(map)) { > > + WARN_ON_ONCE(1); > > Same here. > > > + goto out; > > + } > > + > > + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); > > + > > + if (cid >= ARRAY_SIZE(map->logical_map)) { > > + WARN_ON_ONCE(1); > > Same here. > > Otherwise looks good. > > Paolo > > > + goto out; > > + } > > + > > + for_each_set_bit(i, &bitmap, 16) { > > + dst = map->logical_map[cid][i]; > > + if (++r == 2) > > + goto out; > > + } > > + > > + if (dst && kvm_apic_present(dst->vcpu)) > > + *dest_vcpu = dst->vcpu; > > + else > > + goto out; > > + } > > + > > + ret = true; > > +out: > > + rcu_read_unlock(); > > + return ret; > > +} > > + > > + > > +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, > > + struct kvm_vcpu **dest_vcpu) > > +{ > > + int i, r = 0; > > + struct kvm_vcpu *vcpu; > > + > > + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) > > + return true; > > + > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + if (!kvm_apic_present(vcpu)) > > + continue; > > + > > + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, > > + irq->dest_id, irq->dest_mode)) > > + continue; > > + > > + if (++r == 2) > > + return false; > > + > > + *dest_vcpu = vcpu; > > + } > > + > > + return r == 1; > > +} > > +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); > > + > > #define IOAPIC_ROUTING_ENTRY(irq) \ > > { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ > > .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } > > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > > index 2a5ca97..9848cd50 100644 > > --- a/arch/x86/kvm/lapic.c > > +++ b/arch/x86/kvm/lapic.c > > @@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic > *apic) > > /* The logical map is definitely wrong if we have multiple > > * modes at the same time. (Physical map is always right.) > > */ > > -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) > > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) > > { > > return !(map->mode & (map->mode - 1)); > > } > > > > -static inline void > > -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) > > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 > *lid) > > { > > unsigned lid_bits; > > > > diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h > > index 7195274..6798b87 100644 > > --- a/arch/x86/kvm/lapic.h > > +++ b/arch/x86/kvm/lapic.h > > @@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, > int vector); > > > > void wait_lapic_expire(struct kvm_vcpu *vcpu); > > > > +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 > *lid); > > +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map); > > #endif > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 17/09/2015 05:17, Wu, Feng wrote: >>> > > + if (irq->dest_mode == APIC_DEST_PHYSICAL) { >>> > > + if (irq->dest_id == 0xFF) >>> > > + goto out; >>> > > + >>> > > + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { >> > >> > Warning here is wrong, the guest can trigger it. > Could you please share more information about how the guest > triggers these conditions (including the following two), Thanks > a lot! irq->dest_id is a 16-bit value, so it can be > 255. > + if (!kvm_apic_logical_map_valid(map)) { > + WARN_ON_ONCE(1); Here, the guest can trigger it by setting a few APICs in flat mode and others in cluster mode, for example. > + if (cid >= ARRAY_SIZE(map->logical_map)) { > + WARN_ON_ONCE(1); In x2apic mode irq->dest_id could have bits 12..15 set. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Paolo Bonzini [mailto:pbonzini@redhat.com] > Sent: Thursday, September 17, 2015 5:42 PM > To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org; > mtosatti@redhat.com > Cc: eric.auger@linaro.org; kvm@vger.kernel.org; > iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH v8 03/13] KVM: Define a new interface > kvm_intr_is_single_vcpu() > > > > On 17/09/2015 05:17, Wu, Feng wrote: > >>> > > + if (irq->dest_mode == APIC_DEST_PHYSICAL) { > >>> > > + if (irq->dest_id == 0xFF) > >>> > > + goto out; > >>> > > + > >>> > > + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { > >> > > >> > Warning here is wrong, the guest can trigger it. > > Could you please share more information about how the guest > > triggers these conditions (including the following two), Thanks > > a lot! > > irq->dest_id is a 16-bit value, so it can be > 255. Yes, irq->dest_id is defined as u32, but by looking the current KVM code, seems desst_id is used as an u8 variable, even in x2apic mode the dest_id will not beyond 255 (except for broadcast dest in in x2apic mode). Correct me if I am wrong. Thanks a lot! > > > + if (!kvm_apic_logical_map_valid(map)) { > > + WARN_ON_ONCE(1); > > Here, the guest can trigger it by setting a few APICs in flat mode and > others in cluster mode, for example. Oh, right, the logical map works only when the destination mode of all the vCPUs are the same. > > > + if (cid >= ARRAY_SIZE(map->logical_map)) { > > + WARN_ON_ONCE(1); > > In x2apic mode irq->dest_id could have bits 12..15 set. cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as below: u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15. Do I miss something here? Thanks! Thanks, Feng > > Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>> On 17/09/2015 05:17, Wu, Feng wrote: >>>>>>> + if (irq->dest_mode == APIC_DEST_PHYSICAL) { >>>>>>> + if (irq->dest_id == 0xFF) >>>>>>> + goto out; >>>>>>> + >>>>>>> + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { >>>>> >>>>> Warning here is wrong, the guest can trigger it. >>> Could you please share more information about how the guest >>> triggers these conditions (including the following two), Thanks >>> a lot! >> >> irq->dest_id is a 16-bit value, so it can be > 255. > > Yes, irq->dest_id is defined as u32, but by looking the current KVM > code, seems desst_id is used as an u8 variable, even in x2apic mode > the dest_id will not beyond 255 (except for broadcast dest in in x2apic > mode). Correct me if I am wrong. Thanks a lot! Actually you're right, the MSI destination is only 8 bits. I was confused because of #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 in arch/x86/include/asm/msidef.h. But there may be a bug, see below... >>> + if (cid >= ARRAY_SIZE(map->logical_map)) { >>> + WARN_ON_ONCE(1); >> >> In x2apic mode irq->dest_id could have bits 12..15 set. > > cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and > in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as > below: > > u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > > So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15. I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is buggy in x2apic mode. It does: if (apic_x2apic_mode(apic)) return ((logical_id >> 16) == (mda >> 16)) && (logical_id & mda & 0xffff) != 0; But mda is only 8-bits for MSI and IOAPIC interrupts. Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id != APIC_BROADCAST case? It never triggers with Linux because it uses only the physical mode (that's not super-easy to see; ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that is allocated with kzalloc). > Do I miss something here? Thanks! No, you were right. But still I think the WARNs are unnecessary; it is conceivable that some future chipset adds support for more than 8-bits in the dest_id. Paolo > Thanks, > Feng > >> >> Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-09-17 16:24+0200, Paolo Bonzini: > I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is > buggy in x2apic mode. > > It does: > > if (apic_x2apic_mode(apic)) > return ((logical_id >> 16) == (mda >> 16)) > && (logical_id & mda & 0xffff) != 0; > > But mda is only 8-bits for MSI and IOAPIC interrupts. > > Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id > != APIC_BROADCAST case? It never triggers with Linux because it uses > only the physical mode (that's not super-easy to see; > ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that > is allocated with kzalloc). KVM handles that case, it's just convoluted. (I wish we scrapped the IR-less x2APIC mode.) For interrupts from MSI and IOxAPIC: - Flat logical interrupts are delivered as if we had natural (CPU0<->bit0, CPU1<->bit1, ...) flat logical xAPIC for first 8 VCPUs. - Cluster logical doesn't work much, it's interpreted like flat logical. I didn't care about xAPIC cluster because Linux, the sole user of our paravirtualized x2APIC, doesn't configure it. I'll paste kvm_apic_mda() source for better explanation: static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, struct kvm_lapic *target) { bool ipi = source != NULL; bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) return X2APIC_BROADCAST; return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); } MSI/IOxAPIC interrupt means that source is NULL and if the target is in x2APIC mode, the original 'dest_id' is returned as mda => a flat logical xAPIC to 0x0f will get interpreted as (cluster) logical x2APIC 0xf in kvm_apic_match_logical_addr(). xAPIC address are only 8 bit long so they always get delivered to x2APIC cluster 0, where first 16 bits work like xAPIC flat logical mode. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 17/09/2015 17:58, Radim Kr?má? wrote: > For interrupts from MSI and IOxAPIC: > - Flat logical interrupts are delivered as if we had natural > (CPU0<->bit0, CPU1<->bit1, ...) flat logical xAPIC for first 8 VCPUs. > - Cluster logical doesn't work much, it's interpreted like flat logical. > I didn't care about xAPIC cluster because Linux, the sole user of our > paravirtualized x2APIC, doesn't configure it. > > I'll paste kvm_apic_mda() source for better explanation: > > static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, > struct kvm_lapic *target) > { > bool ipi = source != NULL; > bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); > > if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) > return X2APIC_BROADCAST; > > return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); > } > > MSI/IOxAPIC interrupt means that source is NULL and if the target is in > x2APIC mode, the original 'dest_id' is returned as mda => a flat logical > xAPIC to 0x0f will get interpreted as (cluster) logical x2APIC 0xf in > kvm_apic_match_logical_addr(). > xAPIC address are only 8 bit long so they always get delivered to x2APIC > cluster 0, where first 16 bits work like xAPIC flat logical mode. Ok, I was wondering whether this was the correct interpretation. Thanks! Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> -----Original Message----- > From: Paolo Bonzini [mailto:pbonzini@redhat.com] > Sent: Thursday, September 17, 2015 10:25 PM > To: Wu, Feng; alex.williamson@redhat.com; joro@8bytes.org; > mtosatti@redhat.com > Cc: eric.auger@linaro.org; kvm@vger.kernel.org; > iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org; Radim > Kr?má? > Subject: Re: [PATCH v8 03/13] KVM: Define a new interface > kvm_intr_is_single_vcpu() > > >> On 17/09/2015 05:17, Wu, Feng wrote: > >>>>>>> + if (irq->dest_mode == APIC_DEST_PHYSICAL) { > >>>>>>> + if (irq->dest_id == 0xFF) > >>>>>>> + goto out; > >>>>>>> + > >>>>>>> + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { > >>>>> > >>>>> Warning here is wrong, the guest can trigger it. > >>> Could you please share more information about how the guest > >>> triggers these conditions (including the following two), Thanks > >>> a lot! > >> > >> irq->dest_id is a 16-bit value, so it can be > 255. > > > > Yes, irq->dest_id is defined as u32, but by looking the current KVM > > code, seems desst_id is used as an u8 variable, even in x2apic mode > > the dest_id will not beyond 255 (except for broadcast dest in in x2apic > > mode). Correct me if I am wrong. Thanks a lot! > > Actually you're right, the MSI destination is only 8 bits. I was > confused because of > > #define MSI_ADDR_DEST_ID_MASK 0x00ffff0 > > in arch/x86/include/asm/msidef.h. But there may be a bug, see below... > > >>> + if (cid >= ARRAY_SIZE(map->logical_map)) { > >>> + WARN_ON_ONCE(1); > >> > >> In x2apic mode irq->dest_id could have bits 12..15 set. > > > > cid is gotten from bit 16 ..31 of the ldr (in apic_logical_id()), and > > in x2apic mode, ldr is constructed in kvm_apic_set_x2apic_id() as > > below: > > > > u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); > > > > So in fact, cid is (id >> 4), I cannot think of why cid can beyond 15. > > I think kvm_apic_match_logical_addr for MSI and IOAPIC interrupts is > buggy in x2apic mode. > > It does: > > if (apic_x2apic_mode(apic)) > return ((logical_id >> 16) == (mda >> 16)) > && (logical_id & mda & 0xffff) != 0; > > But mda is only 8-bits for MSI and IOAPIC interrupts. > > Radim, should kvm_apic_mda also handle the !ipi && x2apic_mda && dest_id > != APIC_BROADCAST case? It never triggers with Linux because it uses > only the physical mode (that's not super-easy to see; > ioapic_set_affinity looks for the RTEs in irq_data->chip_data and that > is allocated with kzalloc). > > > Do I miss something here? Thanks! > > No, you were right. > > But still I think the WARNs are unnecessary; it is conceivable that some > future chipset adds support for more than 8-bits in the dest_id. No problem, I agree with it. Here I just want clarify some questions, thanks for the elaboration! Thanks, Feng > > Paolo > > > Thanks, > > Feng > > > >> > >> Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
DQoNCj4gLS0tLS1PcmlnaW5hbCBNZXNzYWdlLS0tLS0NCj4gRnJvbTogUGFvbG8gQm9uemluaSBb bWFpbHRvOnBib256aW5pQHJlZGhhdC5jb21dDQo+IFNlbnQ6IEZyaWRheSwgU2VwdGVtYmVyIDE4 LCAyMDE1IDEyOjAwIEFNDQo+IFRvOiBSYWRpbSBLcsSNbcOhxZkNCj4gQ2M6IFd1LCBGZW5nOyBh bGV4LndpbGxpYW1zb25AcmVkaGF0LmNvbTsgam9yb0A4Ynl0ZXMub3JnOw0KPiBtdG9zYXR0aUBy ZWRoYXQuY29tOyBlcmljLmF1Z2VyQGxpbmFyby5vcmc7IGt2bUB2Z2VyLmtlcm5lbC5vcmc7DQo+ IGlvbW11QGxpc3RzLmxpbnV4LWZvdW5kYXRpb24ub3JnOyBsaW51eC1rZXJuZWxAdmdlci5rZXJu ZWwub3JnDQo+IFN1YmplY3Q6IFJlOiBbUEFUQ0ggdjggMDMvMTNdIEtWTTogRGVmaW5lIGEgbmV3 IGludGVyZmFjZQ0KPiBrdm1faW50cl9pc19zaW5nbGVfdmNwdSgpDQo+IA0KPiANCj4gDQo+IE9u IDE3LzA5LzIwMTUgMTc6NTgsIFJhZGltIEtyxI1tw6HFmSB3cm90ZToNCj4gPiBGb3IgaW50ZXJy dXB0cyBmcm9tIE1TSSBhbmQgSU94QVBJQzoNCj4gPiAtIEZsYXQgbG9naWNhbCBpbnRlcnJ1cHRz IGFyZSBkZWxpdmVyZWQgYXMgaWYgd2UgaGFkIG5hdHVyYWwNCj4gPiAgIChDUFUwPC0+Yml0MCwg Q1BVMTwtPmJpdDEsIC4uLikgZmxhdCBsb2dpY2FsIHhBUElDIGZvciBmaXJzdCA4IFZDUFVzLg0K PiA+IC0gQ2x1c3RlciBsb2dpY2FsIGRvZXNuJ3Qgd29yayBtdWNoLCBpdCdzIGludGVycHJldGVk IGxpa2UgZmxhdCBsb2dpY2FsLg0KPiA+ICAgSSBkaWRuJ3QgY2FyZSBhYm91dCB4QVBJQyBjbHVz dGVyIGJlY2F1c2UgTGludXgsIHRoZSBzb2xlIHVzZXIgb2Ygb3VyDQo+ID4gICBwYXJhdmlydHVh bGl6ZWQgeDJBUElDLCBkb2Vzbid0IGNvbmZpZ3VyZSBpdC4NCj4gPg0KPiA+IEknbGwgcGFzdGUg a3ZtX2FwaWNfbWRhKCkgc291cmNlIGZvciBiZXR0ZXIgZXhwbGFuYXRpb246DQo+ID4NCj4gPiAg IHN0YXRpYyB1MzIga3ZtX2FwaWNfbWRhKHVuc2lnbmVkIGludCBkZXN0X2lkLCBzdHJ1Y3Qga3Zt X2xhcGljICpzb3VyY2UsDQo+ID4gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAg ICAgICAgICAgICAgc3RydWN0IGt2bV9sYXBpYw0KPiAqdGFyZ2V0KQ0KPiA+ICAgew0KPiA+ICAg CWJvb2wgaXBpID0gc291cmNlICE9IE5VTEw7DQo+ID4gICAJYm9vbCB4MmFwaWNfbWRhID0gYXBp Y194MmFwaWNfbW9kZShpcGkgPyBzb3VyY2UgOiB0YXJnZXQpOw0KPiA+DQo+ID4gICAJaWYgKCFp cGkgJiYgZGVzdF9pZCA9PSBBUElDX0JST0FEQ0FTVCAmJiB4MmFwaWNfbWRhKQ0KPiA+ICAgCQly ZXR1cm4gWDJBUElDX0JST0FEQ0FTVDsNCj4gPg0KPiA+ICAgCXJldHVybiB4MmFwaWNfbWRhID8g ZGVzdF9pZCA6IFNFVF9BUElDX0RFU1RfRklFTEQoZGVzdF9pZCk7DQo+ID4gICB9DQo+ID4NCj4g PiBNU0kvSU94QVBJQyBpbnRlcnJ1cHQgbWVhbnMgdGhhdCBzb3VyY2UgaXMgTlVMTCBhbmQgaWYg dGhlIHRhcmdldCBpcyBpbg0KPiA+IHgyQVBJQyBtb2RlLCB0aGUgb3JpZ2luYWwgJ2Rlc3RfaWQn IGlzIHJldHVybmVkIGFzIG1kYSA9PiBhIGZsYXQgbG9naWNhbA0KPiA+IHhBUElDIHRvIDB4MGYg d2lsbCBnZXQgaW50ZXJwcmV0ZWQgYXMgKGNsdXN0ZXIpIGxvZ2ljYWwgeDJBUElDIDB4ZiBpbg0K PiA+IGt2bV9hcGljX21hdGNoX2xvZ2ljYWxfYWRkcigpLg0KPiA+IHhBUElDIGFkZHJlc3MgYXJl IG9ubHkgOCBiaXQgbG9uZyBzbyB0aGV5IGFsd2F5cyBnZXQgZGVsaXZlcmVkIHRvIHgyQVBJQw0K PiA+IGNsdXN0ZXIgMCwgd2hlcmUgZmlyc3QgMTYgYml0cyB3b3JrIGxpa2UgeEFQSUMgZmxhdCBs b2dpY2FsIG1vZGUuDQo+IA0KPiBPaywgSSB3YXMgd29uZGVyaW5nIHdoZXRoZXIgdGhpcyB3YXMg dGhlIGNvcnJlY3QgaW50ZXJwcmV0YXRpb24uICBUaGFua3MhDQoNClBhb2xvLCBJIGRvbid0IHRo aW5rIFJhZGltIGNsYXJpZnkgeW91ciBjb25jZXJuLCByaWdodD8gU2luY2UgbWRhIGlzIDgtYml0 LCBpdA0KaXMgd3Jvbmcgd2l0aCBtZGEgPj4gMTYsIHRoaXMgaXMgeW91ciBjb25jZXJuLCByaWdo dD8NCg0KVGhhbmtzLA0KRmVuZw0KDQo+IA0KPiBQYW9sbw0K -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2015-09-17 23:18+0000, Wu, Feng: >> From: Paolo Bonzini [mailto:pbonzini@redhat.com] >> On 17/09/2015 17:58, Radim Kr?má? wrote: >>> xAPIC address are only 8 bit long so they always get delivered to x2APIC >>> cluster 0, where first 16 bits work like xAPIC flat logical mode. >> >> Ok, I was wondering whether this was the correct interpretation. Thanks! > > Paolo, I don't think Radim clarify your concern, right? Since mda is 8-bit, it > is wrong with mda >> 16, this is your concern, right? In case it was: mda is u32 so the bitshift is defined by C. (xAPIC destinations in KVM's x2APIC mode are stored in lowest 8 bits of mda, hence the cluster is always 0.) Or am I still missing the point? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 18/09/2015 18:16, Radim Kr?má? wrote: >>> >> Ok, I was wondering whether this was the correct interpretation. Thanks! >> > >> > Paolo, I don't think Radim clarify your concern, right? Since mda is 8-bit, it >> > is wrong with mda >> 16, this is your concern, right? > In case it was: mda is u32 so the bitshift is defined by C. > (xAPIC destinations in KVM's x2APIC mode are stored in lowest 8 bits of > mda, hence the cluster is always 0.) > > Or am I still missing the point? Yes, remembering that the cluster is always 0 solved my doubt. Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49ec903..af11bca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1204,4 +1204,7 @@ int __x86_set_memory_region(struct kvm *kvm, int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 9efff9e..97ba1d6 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -297,6 +297,100 @@ out: return r; } +static bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, + struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) +{ + struct kvm_apic_map *map; + bool ret = false; + struct kvm_lapic *dst = NULL; + + if (irq->shorthand) + return false; + + rcu_read_lock(); + map = rcu_dereference(kvm->arch.apic_map); + + if (!map) + goto out; + + if (irq->dest_mode == APIC_DEST_PHYSICAL) { + if (irq->dest_id == 0xFF) + goto out; + + if (irq->dest_id >= ARRAY_SIZE(map->phys_map)) { + WARN_ON_ONCE(1); + goto out; + } + + dst = map->phys_map[irq->dest_id]; + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } else { + u16 cid; + unsigned long bitmap = 1; + int i, r = 0; + + if (!kvm_apic_logical_map_valid(map)) { + WARN_ON_ONCE(1); + goto out; + } + + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); + + if (cid >= ARRAY_SIZE(map->logical_map)) { + WARN_ON_ONCE(1); + goto out; + } + + for_each_set_bit(i, &bitmap, 16) { + dst = map->logical_map[cid][i]; + if (++r == 2) + goto out; + } + + if (dst && kvm_apic_present(dst->vcpu)) + *dest_vcpu = dst->vcpu; + else + goto out; + } + + ret = true; +out: + rcu_read_unlock(); + return ret; +} + + +bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, + struct kvm_vcpu **dest_vcpu) +{ + int i, r = 0; + struct kvm_vcpu *vcpu; + + if (kvm_intr_is_single_vcpu_fast(kvm, irq, dest_vcpu)) + return true; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, NULL, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (++r == 2) + return false; + + *dest_vcpu = vcpu; + } + + return r == 1; +} +EXPORT_SYMBOL_GPL(kvm_intr_is_single_vcpu); + #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2a5ca97..9848cd50 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -136,13 +136,12 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) /* The logical map is definitely wrong if we have multiple * modes at the same time. (Physical map is always right.) */ -static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) { return !(map->mode & (map->mode - 1)); } -static inline void -apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) { unsigned lid_bits; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 7195274..6798b87 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -169,4 +169,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); +void apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid); +bool kvm_apic_logical_map_valid(struct kvm_apic_map *map); #endif
This patch defines a new interface kvm_intr_is_single_vcpu(), which can returns whether the interrupt is for single-CPU or not. It is used by VT-d PI, since now we only support single-CPU interrupts, For lowest-priority interrupts, if user configures it via /proc/irq or uses irqbalance to make it single-CPU, we can use PI to deliver the interrupts to it. Full functionality of lowest-priority support will be added later. Signed-off-by: Feng Wu <feng.wu@intel.com> --- v8: - Some optimizations in kvm_intr_is_single_vcpu(). - Expose kvm_intr_is_single_vcpu() so we can use it in vmx code. - Add kvm_intr_is_single_vcpu_fast() as the fast path to find the target vCPU for the single-destination interrupt arch/x86/include/asm/kvm_host.h | 3 ++ arch/x86/kvm/irq_comm.c | 94 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/lapic.c | 5 +-- arch/x86/kvm/lapic.h | 2 + 4 files changed, 101 insertions(+), 3 deletions(-)