Message ID | 20121110154539.3061.82553.stgit@chazy-air (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote: > From: Marc Zyngier <marc.zyngier@arm.com> > > If we have level interrupts already programmed to fire on a vcpu, > there is no reason to kick it after injecting a new interrupt, > as we're guaranteed that we'll exit when the level interrupt will > be EOId (VGIC_LR_EOI is set). > > The exit will force a reload of the VGIC, injecting the new interrupts. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> > --- > arch/arm/include/asm/kvm_vgic.h | 10 ++++++++++ > arch/arm/kvm/arm.c | 10 +++++++++- > arch/arm/kvm/vgic.c | 10 ++++++++-- > 3 files changed, 27 insertions(+), 3 deletions(-) > > diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h > index a8e7a93..7d2662c 100644 > --- a/arch/arm/include/asm/kvm_vgic.h > +++ b/arch/arm/include/asm/kvm_vgic.h > @@ -215,6 +215,9 @@ struct vgic_cpu { > u32 vgic_elrsr[2]; /* Saved only */ > u32 vgic_apr; > u32 vgic_lr[64]; /* A15 has only 4... */ > + > + /* Number of level-triggered interrupt in progress */ > + atomic_t irq_active_count; > #endif > }; > > @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > #define vgic_initialized(k) ((k)->arch.vgic.ready) > +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) When is the atomic_t initialised to zero? I can only see increments. > + > #else > static inline int kvm_vgic_hyp_init(void) > { > @@ -305,6 +310,11 @@ static inline bool vgic_initialized(struct kvm *kvm) > { > return true; > } > + > +static inline int vgic_active_irq(struct kvm_vcpu *vcpu) > +{ > + return 0; > +} > #endif > > #endif > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index a633d9d..1716f12 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -94,7 +94,15 @@ int kvm_arch_hardware_enable(void *garbage) > > int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) > { > - return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; > + if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { > + if (vgic_active_irq(vcpu) && > + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > + return 0; > + > + return 1; > + } > + > + return 0; That's pretty nasty... why don't you check if there's an active interrupt before trying to change the vcpu mode? That way, you can avoid the double cmpxchg. > } > > void kvm_arch_hardware_disable(void *garbage) > diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c > index 415ddb8..146de1d 100644 > --- a/arch/arm/kvm/vgic.c > +++ b/arch/arm/kvm/vgic.c > @@ -705,8 +705,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) > kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]); > BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); > vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT; > - if (is_level) > + if (is_level) { > vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; > + atomic_inc(&vgic_cpu->irq_active_count); > + } > return true; > } > > @@ -718,8 +720,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) > > kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); > vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); > - if (is_level) > + if (is_level) { > vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; > + atomic_inc(&vgic_cpu->irq_active_count); > + } > > vgic_cpu->vgic_irq_lr_map[irq] = lr; > clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); > @@ -1011,6 +1015,8 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) > > vgic_bitmap_set_irq_val(&dist->irq_active, > vcpu->vcpu_id, irq, 0); > + atomic_dec(&vgic_cpu->irq_active_count); > + smp_mb(); If you actually need this, try smp_mb__after_atomic_dec although of course I'd like to know why it's required :) Will -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote: > On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote: > > From: Marc Zyngier <marc.zyngier@arm.com> > > > > If we have level interrupts already programmed to fire on a vcpu, > > there is no reason to kick it after injecting a new interrupt, > > as we're guaranteed that we'll exit when the level interrupt will > > be EOId (VGIC_LR_EOI is set). > > > > The exit will force a reload of the VGIC, injecting the new interrupts. > > > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > > Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> > > --- > > arch/arm/include/asm/kvm_vgic.h | 10 ++++++++++ > > arch/arm/kvm/arm.c | 10 +++++++++- > > arch/arm/kvm/vgic.c | 10 ++++++++-- > > 3 files changed, 27 insertions(+), 3 deletions(-) > > > > diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h > > index a8e7a93..7d2662c 100644 > > --- a/arch/arm/include/asm/kvm_vgic.h > > +++ b/arch/arm/include/asm/kvm_vgic.h > > @@ -215,6 +215,9 @@ struct vgic_cpu { > > u32 vgic_elrsr[2]; /* Saved only */ > > u32 vgic_apr; > > u32 vgic_lr[64]; /* A15 has only 4... */ > > + > > + /* Number of level-triggered interrupt in progress */ > > + atomic_t irq_active_count; > > #endif > > }; > > > > @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, > > > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > > #define vgic_initialized(k) ((k)->arch.vgic.ready) > > +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) > > When is the atomic_t initialised to zero? I can only see increments. I'd question whether an atomic type is correct for this; the only protection that it's offering is to ensure that the atomic increment and decrement occur atomically - there's nothing else that they're doing in this code. If those atomic increments and decrements are occuring beneath a common lock, then using atomic types is just mere code obfuscation. For example, I'd like to question the correctness of this: + if (vgic_active_irq(vcpu) && + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) What if vgic_active_irq() reads the atomic type, immediately after it gets decremented to zero before the cmpxchg() is executed? Would that be a problem? If yes, yet again this illustrates why the use of atomic types leads people down the path of believing that their code somehow becomes magically safe through the use of this smoke-screen. IMHO, every use of atomic_t must be questioned and carefully analysed before it gets into the kernel - many are buggy through assumptions that atomic_t buys you something magic. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
For the sake of public education, let me rewrite this patch a bit to illustrate why atomic_t's are bad, and then people can review this instead. Every change I've made here is functionally equivalent to the behaviour of the atomic type; I have not added any new bugs here that aren't present in the original code. It is my hope that through education like this, people will see that atomic types have no magic properties, and their use does not make code automatically race free and correct; in fact, the inappropriate use of atomic types is pure obfuscation and causes confusion. On Sat, Nov 10, 2012 at 04:45:39PM +0100, Christoffer Dall wrote: > diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h > index a8e7a93..7d2662c 100644 > --- a/arch/arm/include/asm/kvm_vgic.h > +++ b/arch/arm/include/asm/kvm_vgic.h > @@ -215,6 +215,9 @@ struct vgic_cpu { > u32 vgic_elrsr[2]; /* Saved only */ > u32 vgic_apr; > u32 vgic_lr[64]; /* A15 has only 4... */ > + > + /* Number of level-triggered interrupt in progress */ > + atomic_t irq_active_count; + int irq_active_count; > #endif > }; > > @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > #define vgic_initialized(k) ((k)->arch.vgic.ready) > +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) > + +#define vgic_active_irq(v) ((v)->arch.vgic_cpu.irq_active_count) > #else > static inline int kvm_vgic_hyp_init(void) > { > @@ -305,6 +310,11 @@ static inline bool vgic_initialized(struct kvm *kvm) > { > return true; > } > + > +static inline int vgic_active_irq(struct kvm_vcpu *vcpu) > +{ > + return 0; > +} > #endif > > #endif > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index a633d9d..1716f12 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -94,7 +94,15 @@ int kvm_arch_hardware_enable(void *garbage) > > int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) > { > - return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; > + if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { > + if (vgic_active_irq(vcpu) && > + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > + return 0; So with the above change to the macro, this becomes: + if (vcpu->arch.vgic_cpu.irq_active_count && + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > + > + return 1; > + } > + > + return 0; > } > > void kvm_arch_hardware_disable(void *garbage) > diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c > index 415ddb8..146de1d 100644 > --- a/arch/arm/kvm/vgic.c > +++ b/arch/arm/kvm/vgic.c > @@ -705,8 +705,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) > kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]); > BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); > vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT; > - if (is_level) > + if (is_level) { > vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; > + atomic_inc(&vgic_cpu->irq_active_count); + spin_lock_irqsave(&atomic_lock, flags); + vgic_cpu->irq_active_count++; + spin_unlock_irqrestore(&atomic_lock, flags); > + } > return true; > } > > @@ -718,8 +720,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) > > kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); > vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); > - if (is_level) > + if (is_level) { > vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; > + atomic_inc(&vgic_cpu->irq_active_count); + spin_lock_irqsave(&atomic_lock, flags); + vgic_cpu->irq_active_count++; + spin_unlock_irqrestore(&atomic_lock, flags); > + } > > vgic_cpu->vgic_irq_lr_map[irq] = lr; > clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); > @@ -1011,6 +1015,8 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) > > vgic_bitmap_set_irq_val(&dist->irq_active, > vcpu->vcpu_id, irq, 0); > + atomic_dec(&vgic_cpu->irq_active_count); + spin_lock_irqsave(&atomic_lock, flags); + vgic_cpu->irq_active_count--; + spin_unlock_irqrestore(&atomic_lock, flags); > + smp_mb(); > vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; > writel_relaxed(vgic_cpu->vgic_lr[lr], > dist->vctrl_base + GICH_LR0 + (lr << 2)); > > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/12/12 10:58, Russell King - ARM Linux wrote: > On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote: >> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote: >>> From: Marc Zyngier <marc.zyngier@arm.com> >>> >>> If we have level interrupts already programmed to fire on a vcpu, >>> there is no reason to kick it after injecting a new interrupt, >>> as we're guaranteed that we'll exit when the level interrupt will >>> be EOId (VGIC_LR_EOI is set). >>> >>> The exit will force a reload of the VGIC, injecting the new interrupts. >>> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> >>> --- >>> arch/arm/include/asm/kvm_vgic.h | 10 ++++++++++ >>> arch/arm/kvm/arm.c | 10 +++++++++- >>> arch/arm/kvm/vgic.c | 10 ++++++++-- >>> 3 files changed, 27 insertions(+), 3 deletions(-) >>> >>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h >>> index a8e7a93..7d2662c 100644 >>> --- a/arch/arm/include/asm/kvm_vgic.h >>> +++ b/arch/arm/include/asm/kvm_vgic.h >>> @@ -215,6 +215,9 @@ struct vgic_cpu { >>> u32 vgic_elrsr[2]; /* Saved only */ >>> u32 vgic_apr; >>> u32 vgic_lr[64]; /* A15 has only 4... */ >>> + >>> + /* Number of level-triggered interrupt in progress */ >>> + atomic_t irq_active_count; >>> #endif >>> }; >>> >>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, >>> >>> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) >>> #define vgic_initialized(k) ((k)->arch.vgic.ready) >>> +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) >> >> When is the atomic_t initialised to zero? I can only see increments. > > I'd question whether an atomic type is correct for this; the only > protection that it's offering is to ensure that the atomic increment > and decrement occur atomically - there's nothing else that they're doing > in this code. > > If those atomic increments and decrements are occuring beneath a common > lock, then using atomic types is just mere code obfuscation. No, they occur on code paths that do not have a common lock (one of them being an interrupt handler). This may change though, after one comment Will made earlier (the thing about delayed interrupts). If these two code sections become mutually exclusive, then indeed there will be no point in having an atomic type anymore. > For example, I'd like to question the correctness of this: > > + if (vgic_active_irq(vcpu) && > + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > > What if vgic_active_irq() reads the atomic type, immediately after it gets > decremented to zero before the cmpxchg() is executed? Would that be a > problem? I do not think so. If the value gets decremented, it means we took a maintenance interrupt, which means we exited the guest at some point. Two possibilities: - We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and cmpxchg will fail, hence signaling the guest to reload its state. This is not needed (the guest will reload its state anyway), but doesn't cause any harm. - We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg will fail as well, triggering a reload which is needed this time. M.
On Wed, Dec 05, 2012 at 12:17:57PM +0000, Marc Zyngier wrote: > On 05/12/12 10:58, Russell King - ARM Linux wrote: > > On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote: > >> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote: > >>> From: Marc Zyngier <marc.zyngier@arm.com> > >>> > >>> If we have level interrupts already programmed to fire on a vcpu, > >>> there is no reason to kick it after injecting a new interrupt, > >>> as we're guaranteed that we'll exit when the level interrupt will > >>> be EOId (VGIC_LR_EOI is set). > >>> > >>> The exit will force a reload of the VGIC, injecting the new interrupts. > >>> > >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > >>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> > >>> --- > >>> arch/arm/include/asm/kvm_vgic.h | 10 ++++++++++ > >>> arch/arm/kvm/arm.c | 10 +++++++++- > >>> arch/arm/kvm/vgic.c | 10 ++++++++-- > >>> 3 files changed, 27 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h > >>> index a8e7a93..7d2662c 100644 > >>> --- a/arch/arm/include/asm/kvm_vgic.h > >>> +++ b/arch/arm/include/asm/kvm_vgic.h > >>> @@ -215,6 +215,9 @@ struct vgic_cpu { > >>> u32 vgic_elrsr[2]; /* Saved only */ > >>> u32 vgic_apr; > >>> u32 vgic_lr[64]; /* A15 has only 4... */ > >>> + > >>> + /* Number of level-triggered interrupt in progress */ > >>> + atomic_t irq_active_count; > >>> #endif > >>> }; > >>> > >>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, > >>> > >>> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) > >>> #define vgic_initialized(k) ((k)->arch.vgic.ready) > >>> +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) > >> > >> When is the atomic_t initialised to zero? I can only see increments. > > > > I'd question whether an atomic type is correct for this; the only > > protection that it's offering is to ensure that the atomic increment > > and decrement occur atomically - there's nothing else that they're doing > > in this code. > > > > If those atomic increments and decrements are occuring beneath a common > > lock, then using atomic types is just mere code obfuscation. > > No, they occur on code paths that do not have a common lock (one of them > being an interrupt handler). This may change though, after one comment > Will made earlier (the thing about delayed interrupts). > > If these two code sections become mutually exclusive, then indeed there > will be no point in having an atomic type anymore. > > > For example, I'd like to question the correctness of this: > > > > + if (vgic_active_irq(vcpu) && > > + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > > > > What if vgic_active_irq() reads the atomic type, immediately after it gets > > decremented to zero before the cmpxchg() is executed? Would that be a > > problem? > > I do not think so. If the value gets decremented, it means we took a > maintenance interrupt, which means we exited the guest at some point. > Two possibilities: > > - We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and > cmpxchg will fail, hence signaling the guest to reload its state. This > is not needed (the guest will reload its state anyway), but doesn't > cause any harm. What is the relative ordering of the atomic decrement and setting vcpu->mode to be OUTSIDE_GUEST_MODE ? Is there a window where we have decremented this atomic type but vcpu->mode is still set to IN_GUEST_MODE. > - We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg > will fail as well, triggering a reload which is needed this time. Well, the whole code looks really weird to me, especially that: + if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { + if (vgic_active_irq(vcpu) && + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) + return 0; + + return 1; + } I've no idea what kvm_vcpu_exiting_guest_mode() is (it doesn't exist in any tree I have access to)... In any case, look at the version I converted to spinlocks and see whether you think the code looks reasonable in that form. If it doesn't then it isn't reasonable in atomic types either. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 05/12/12 12:29, Russell King - ARM Linux wrote: > On Wed, Dec 05, 2012 at 12:17:57PM +0000, Marc Zyngier wrote: >> On 05/12/12 10:58, Russell King - ARM Linux wrote: >>> On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote: >>>> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote: >>>>> From: Marc Zyngier <marc.zyngier@arm.com> >>>>> >>>>> If we have level interrupts already programmed to fire on a vcpu, >>>>> there is no reason to kick it after injecting a new interrupt, >>>>> as we're guaranteed that we'll exit when the level interrupt will >>>>> be EOId (VGIC_LR_EOI is set). >>>>> >>>>> The exit will force a reload of the VGIC, injecting the new interrupts. >>>>> >>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >>>>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> >>>>> --- >>>>> arch/arm/include/asm/kvm_vgic.h | 10 ++++++++++ >>>>> arch/arm/kvm/arm.c | 10 +++++++++- >>>>> arch/arm/kvm/vgic.c | 10 ++++++++-- >>>>> 3 files changed, 27 insertions(+), 3 deletions(-) >>>>> >>>>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h >>>>> index a8e7a93..7d2662c 100644 >>>>> --- a/arch/arm/include/asm/kvm_vgic.h >>>>> +++ b/arch/arm/include/asm/kvm_vgic.h >>>>> @@ -215,6 +215,9 @@ struct vgic_cpu { >>>>> u32 vgic_elrsr[2]; /* Saved only */ >>>>> u32 vgic_apr; >>>>> u32 vgic_lr[64]; /* A15 has only 4... */ >>>>> + >>>>> + /* Number of level-triggered interrupt in progress */ >>>>> + atomic_t irq_active_count; >>>>> #endif >>>>> }; >>>>> >>>>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, >>>>> >>>>> #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) >>>>> #define vgic_initialized(k) ((k)->arch.vgic.ready) >>>>> +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) >>>> >>>> When is the atomic_t initialised to zero? I can only see increments. >>> >>> I'd question whether an atomic type is correct for this; the only >>> protection that it's offering is to ensure that the atomic increment >>> and decrement occur atomically - there's nothing else that they're doing >>> in this code. >>> >>> If those atomic increments and decrements are occuring beneath a common >>> lock, then using atomic types is just mere code obfuscation. >> >> No, they occur on code paths that do not have a common lock (one of them >> being an interrupt handler). This may change though, after one comment >> Will made earlier (the thing about delayed interrupts). >> >> If these two code sections become mutually exclusive, then indeed there >> will be no point in having an atomic type anymore. >> >>> For example, I'd like to question the correctness of this: >>> >>> + if (vgic_active_irq(vcpu) && >>> + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) >>> >>> What if vgic_active_irq() reads the atomic type, immediately after it gets >>> decremented to zero before the cmpxchg() is executed? Would that be a >>> problem? >> >> I do not think so. If the value gets decremented, it means we took a >> maintenance interrupt, which means we exited the guest at some point. >> Two possibilities: >> >> - We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and >> cmpxchg will fail, hence signaling the guest to reload its state. This >> is not needed (the guest will reload its state anyway), but doesn't >> cause any harm. > > What is the relative ordering of the atomic decrement and setting > vcpu->mode to be OUTSIDE_GUEST_MODE ? Is there a window where we have > decremented this atomic type but vcpu->mode is still set to IN_GUEST_MODE. OUTSIDE_GUEST_MODE always occurs first, while interrupts are still masked in SVC. We then unmask the interrupts, causing the maintenance interrupt to be handled. Only this handler causes the active count to be decremented. >> - We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg >> will fail as well, triggering a reload which is needed this time. > > Well, the whole code looks really weird to me, especially that: > > + if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { > + if (vgic_active_irq(vcpu) && > + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) > + return 0; > + > + return 1; > + } > > I've no idea what kvm_vcpu_exiting_guest_mode() is (it doesn't exist in > any tree I have access to)... You should find it in include/linux/kvm_host.h. It reads: static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } Admittedly, the whole sequence should be rewritten to be clearer. What it does is "If we're running a guest and there is no active interrupt, then kick the guest". It probably means the above code should read: if (!vgic_active_irq(vcpu)) return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; return 0; > In any case, look at the version I converted to spinlocks and see whether > you think the code looks reasonable in that form. If it doesn't then it > isn't reasonable in atomic types either. I had a look, and I don't find it problematic. M.
On Wed, Dec 05, 2012 at 01:40:24PM +0000, Marc Zyngier wrote: > Admittedly, the whole sequence should be rewritten to be clearer. What > it does is "If we're running a guest and there is no active interrupt, > then kick the guest". On the whole this entire thing should be written clearer; from the explanations you've given it seems that the only reason this code works is because you're relying on several behaviours all coming together to achieve the right result - which makes for fragile code. You're partly relying on atomic types to ensure that the increment and decrement happen exclusively. You're then relying on a combination of IRQ protection and cmpxchg() to ensure that the non-atomic read of the atomic type won't be a problem. This doesn't inspire confidence, and I have big concerns over whether this code will still be understandable in a number of years time. And I still wonder how safe this is even with your explanations. IRQ disabling only works for the local CPU core so I still have questions over this wrt a SMP host OS. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index a8e7a93..7d2662c 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -215,6 +215,9 @@ struct vgic_cpu { u32 vgic_elrsr[2]; /* Saved only */ u32 vgic_apr; u32 vgic_lr[64]; /* A15 has only 4... */ + + /* Number of level-triggered interrupt in progress */ + atomic_t irq_active_count; #endif }; @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, #define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base)) #define vgic_initialized(k) ((k)->arch.vgic.ready) +#define vgic_active_irq(v) (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0) + #else static inline int kvm_vgic_hyp_init(void) { @@ -305,6 +310,11 @@ static inline bool vgic_initialized(struct kvm *kvm) { return true; } + +static inline int vgic_active_irq(struct kvm_vcpu *vcpu) +{ + return 0; +} #endif #endif diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index a633d9d..1716f12 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -94,7 +94,15 @@ int kvm_arch_hardware_enable(void *garbage) int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { - return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; + if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) { + if (vgic_active_irq(vcpu) && + cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE) + return 0; + + return 1; + } + + return 0; } void kvm_arch_hardware_disable(void *garbage) diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index 415ddb8..146de1d 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -705,8 +705,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT; - if (is_level) + if (is_level) { vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; + atomic_inc(&vgic_cpu->irq_active_count); + } return true; } @@ -718,8 +720,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq); - if (is_level) + if (is_level) { vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI; + atomic_inc(&vgic_cpu->irq_active_count); + } vgic_cpu->vgic_irq_lr_map[irq] = lr; clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); @@ -1011,6 +1015,8 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); + atomic_dec(&vgic_cpu->irq_active_count); + smp_mb(); vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; writel_relaxed(vgic_cpu->vgic_lr[lr], dist->vctrl_base + GICH_LR0 + (lr << 2));