diff mbox

[v4,12/13] ARM: KVM: vgic: reduce the number of vcpu kick

Message ID 20121110154539.3061.82553.stgit@chazy-air (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Nov. 10, 2012, 3:45 p.m. UTC
From: Marc Zyngier <marc.zyngier@arm.com>

If we have level interrupts already programmed to fire on a vcpu,
there is no reason to kick it after injecting a new interrupt,
as we're guaranteed that we'll exit when the level interrupt will
be EOId (VGIC_LR_EOI is set).

The exit will force a reload of the VGIC, injecting the new interrupts.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
 arch/arm/kvm/arm.c              |   10 +++++++++-
 arch/arm/kvm/vgic.c             |   10 ++++++++--
 3 files changed, 27 insertions(+), 3 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Will Deacon Dec. 5, 2012, 10:43 a.m. UTC | #1
On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote:
> From: Marc Zyngier <marc.zyngier@arm.com>
> 
> If we have level interrupts already programmed to fire on a vcpu,
> there is no reason to kick it after injecting a new interrupt,
> as we're guaranteed that we'll exit when the level interrupt will
> be EOId (VGIC_LR_EOI is set).
> 
> The exit will force a reload of the VGIC, injecting the new interrupts.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
> ---
>  arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
>  arch/arm/kvm/arm.c              |   10 +++++++++-
>  arch/arm/kvm/vgic.c             |   10 ++++++++--
>  3 files changed, 27 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> index a8e7a93..7d2662c 100644
> --- a/arch/arm/include/asm/kvm_vgic.h
> +++ b/arch/arm/include/asm/kvm_vgic.h
> @@ -215,6 +215,9 @@ struct vgic_cpu {
>  	u32		vgic_elrsr[2];	/* Saved only */
>  	u32		vgic_apr;
>  	u32		vgic_lr[64];	/* A15 has only 4... */
> +
> +	/* Number of level-triggered interrupt in progress */
> +	atomic_t	irq_active_count;
>  #endif
>  };
>  
> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  
>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
>  #define vgic_initialized(k)	((k)->arch.vgic.ready)
> +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)

When is the atomic_t initialised to zero? I can only see increments.

> +
>  #else
>  static inline int kvm_vgic_hyp_init(void)
>  {
> @@ -305,6 +310,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
>  {
>  	return true;
>  }
> +
> +static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
>  #endif
>  
>  #endif
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index a633d9d..1716f12 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -94,7 +94,15 @@ int kvm_arch_hardware_enable(void *garbage)
>  
>  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>  {
> -	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
> +	if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
> +		if (vgic_active_irq(vcpu) &&
> +		    cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
> +			return 0;
> +
> +		return 1;
> +	}
> +
> +	return 0;

That's pretty nasty... why don't you check if there's an active interrupt before
trying to change the vcpu mode? That way, you can avoid the double cmpxchg.

>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
> index 415ddb8..146de1d 100644
> --- a/arch/arm/kvm/vgic.c
> +++ b/arch/arm/kvm/vgic.c
> @@ -705,8 +705,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
>  		kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]);
>  		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
>  		vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT;
> -		if (is_level)
> +		if (is_level) {
>  			vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
> +			atomic_inc(&vgic_cpu->irq_active_count);
> +		}
>  		return true;
>  	}
>  
> @@ -718,8 +720,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
>  
>  	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
>  	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
> -	if (is_level)
> +	if (is_level) {
>  		vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
> +		atomic_inc(&vgic_cpu->irq_active_count);
> +	}
>  
>  	vgic_cpu->vgic_irq_lr_map[irq] = lr;
>  	clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
> @@ -1011,6 +1015,8 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
>  
>  			vgic_bitmap_set_irq_val(&dist->irq_active,
>  						vcpu->vcpu_id, irq, 0);
> +			atomic_dec(&vgic_cpu->irq_active_count);
> +			smp_mb();

If you actually need this, try smp_mb__after_atomic_dec although of course
I'd like to know why it's required :)

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Dec. 5, 2012, 10:58 a.m. UTC | #2
On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote:
> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote:
> > From: Marc Zyngier <marc.zyngier@arm.com>
> > 
> > If we have level interrupts already programmed to fire on a vcpu,
> > there is no reason to kick it after injecting a new interrupt,
> > as we're guaranteed that we'll exit when the level interrupt will
> > be EOId (VGIC_LR_EOI is set).
> > 
> > The exit will force a reload of the VGIC, injecting the new interrupts.
> > 
> > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> > Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
> > ---
> >  arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
> >  arch/arm/kvm/arm.c              |   10 +++++++++-
> >  arch/arm/kvm/vgic.c             |   10 ++++++++--
> >  3 files changed, 27 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> > index a8e7a93..7d2662c 100644
> > --- a/arch/arm/include/asm/kvm_vgic.h
> > +++ b/arch/arm/include/asm/kvm_vgic.h
> > @@ -215,6 +215,9 @@ struct vgic_cpu {
> >  	u32		vgic_elrsr[2];	/* Saved only */
> >  	u32		vgic_apr;
> >  	u32		vgic_lr[64];	/* A15 has only 4... */
> > +
> > +	/* Number of level-triggered interrupt in progress */
> > +	atomic_t	irq_active_count;
> >  #endif
> >  };
> >  
> > @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >  
> >  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
> >  #define vgic_initialized(k)	((k)->arch.vgic.ready)
> > +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
> 
> When is the atomic_t initialised to zero? I can only see increments.

I'd question whether an atomic type is correct for this; the only
protection that it's offering is to ensure that the atomic increment
and decrement occur atomically - there's nothing else that they're doing
in this code.

If those atomic increments and decrements are occuring beneath a common
lock, then using atomic types is just mere code obfuscation.

For example, I'd like to question the correctness of this:

+               if (vgic_active_irq(vcpu) &&
+                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)

What if vgic_active_irq() reads the atomic type, immediately after it gets
decremented to zero before the cmpxchg() is executed?  Would that be a
problem?

If yes, yet again this illustrates why the use of atomic types leads people
down the path of believing that their code somehow becomes magically safe
through the use of this smoke-screen.  IMHO, every use of atomic_t must be
questioned and carefully analysed before it gets into the kernel - many
are buggy through assumptions that atomic_t buys you something magic.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux Dec. 5, 2012, 11:16 a.m. UTC | #3
For the sake of public education, let me rewrite this patch a bit to
illustrate why atomic_t's are bad, and then people can review this
instead.

Every change I've made here is functionally equivalent to the behaviour
of the atomic type; I have not added any new bugs here that aren't
present in the original code.

It is my hope that through education like this, people will see that
atomic types have no magic properties, and their use does not make
code automatically race free and correct; in fact, the inappropriate
use of atomic types is pure obfuscation and causes confusion.

On Sat, Nov 10, 2012 at 04:45:39PM +0100, Christoffer Dall wrote:
> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> index a8e7a93..7d2662c 100644
> --- a/arch/arm/include/asm/kvm_vgic.h
> +++ b/arch/arm/include/asm/kvm_vgic.h
> @@ -215,6 +215,9 @@ struct vgic_cpu {
>  	u32		vgic_elrsr[2];	/* Saved only */
>  	u32		vgic_apr;
>  	u32		vgic_lr[64];	/* A15 has only 4... */
> +
> +	/* Number of level-triggered interrupt in progress */
> +	atomic_t	irq_active_count;

+	int		irq_active_count;

>  #endif
>  };
>  
> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  
>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
>  #define vgic_initialized(k)	((k)->arch.vgic.ready)
> +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
> +

+#define vgic_active_irq(v)	((v)->arch.vgic_cpu.irq_active_count)

>  #else
>  static inline int kvm_vgic_hyp_init(void)
>  {
> @@ -305,6 +310,11 @@ static inline bool vgic_initialized(struct kvm *kvm)
>  {
>  	return true;
>  }
> +
> +static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
>  #endif
>  
>  #endif
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index a633d9d..1716f12 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -94,7 +94,15 @@ int kvm_arch_hardware_enable(void *garbage)
>  
>  int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>  {
> -	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
> +	if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
> +		if (vgic_active_irq(vcpu) &&
> +		    cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
> +			return 0;

So with the above change to the macro, this becomes:
+		if (vcpu->arch.vgic_cpu.irq_active_count &&
+		    cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)

> +
> +		return 1;
> +	}
> +
> +	return 0;
>  }
>  
>  void kvm_arch_hardware_disable(void *garbage)
> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
> index 415ddb8..146de1d 100644
> --- a/arch/arm/kvm/vgic.c
> +++ b/arch/arm/kvm/vgic.c
> @@ -705,8 +705,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
>  		kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]);
>  		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
>  		vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT;
> -		if (is_level)
> +		if (is_level) {
>  			vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
> +			atomic_inc(&vgic_cpu->irq_active_count);

+			spin_lock_irqsave(&atomic_lock, flags);
+			vgic_cpu->irq_active_count++;
+			spin_unlock_irqrestore(&atomic_lock, flags);

> +		}
>  		return true;
>  	}
>  
> @@ -718,8 +720,10 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
>  
>  	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
>  	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
> -	if (is_level)
> +	if (is_level) {
>  		vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
> +		atomic_inc(&vgic_cpu->irq_active_count);

+		spin_lock_irqsave(&atomic_lock, flags);
+		vgic_cpu->irq_active_count++;
+		spin_unlock_irqrestore(&atomic_lock, flags);

> +	}
>  
>  	vgic_cpu->vgic_irq_lr_map[irq] = lr;
>  	clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
> @@ -1011,6 +1015,8 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
>  
>  			vgic_bitmap_set_irq_val(&dist->irq_active,
>  						vcpu->vcpu_id, irq, 0);
> +			atomic_dec(&vgic_cpu->irq_active_count);

+			spin_lock_irqsave(&atomic_lock, flags);
+			vgic_cpu->irq_active_count--;
+			spin_unlock_irqrestore(&atomic_lock, flags);

> +			smp_mb();
>  			vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
>  			writel_relaxed(vgic_cpu->vgic_lr[lr],
>  				       dist->vctrl_base + GICH_LR0 + (lr << 2));
> 
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Dec. 5, 2012, 12:17 p.m. UTC | #4
On 05/12/12 10:58, Russell King - ARM Linux wrote:
> On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote:
>> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote:
>>> From: Marc Zyngier <marc.zyngier@arm.com>
>>>
>>> If we have level interrupts already programmed to fire on a vcpu,
>>> there is no reason to kick it after injecting a new interrupt,
>>> as we're guaranteed that we'll exit when the level interrupt will
>>> be EOId (VGIC_LR_EOI is set).
>>>
>>> The exit will force a reload of the VGIC, injecting the new interrupts.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
>>> ---
>>>  arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
>>>  arch/arm/kvm/arm.c              |   10 +++++++++-
>>>  arch/arm/kvm/vgic.c             |   10 ++++++++--
>>>  3 files changed, 27 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
>>> index a8e7a93..7d2662c 100644
>>> --- a/arch/arm/include/asm/kvm_vgic.h
>>> +++ b/arch/arm/include/asm/kvm_vgic.h
>>> @@ -215,6 +215,9 @@ struct vgic_cpu {
>>>  	u32		vgic_elrsr[2];	/* Saved only */
>>>  	u32		vgic_apr;
>>>  	u32		vgic_lr[64];	/* A15 has only 4... */
>>> +
>>> +	/* Number of level-triggered interrupt in progress */
>>> +	atomic_t	irq_active_count;
>>>  #endif
>>>  };
>>>  
>>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>>  
>>>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
>>>  #define vgic_initialized(k)	((k)->arch.vgic.ready)
>>> +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
>>
>> When is the atomic_t initialised to zero? I can only see increments.
> 
> I'd question whether an atomic type is correct for this; the only
> protection that it's offering is to ensure that the atomic increment
> and decrement occur atomically - there's nothing else that they're doing
> in this code.
> 
> If those atomic increments and decrements are occuring beneath a common
> lock, then using atomic types is just mere code obfuscation.

No, they occur on code paths that do not have a common lock (one of them
being an interrupt handler). This may change though, after one comment
Will made earlier (the thing about delayed interrupts).

If these two code sections become mutually exclusive, then indeed there
will be no point in having an atomic type anymore.

> For example, I'd like to question the correctness of this:
> 
> +               if (vgic_active_irq(vcpu) &&
> +                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
> 
> What if vgic_active_irq() reads the atomic type, immediately after it gets
> decremented to zero before the cmpxchg() is executed?  Would that be a
> problem?

I do not think so. If the value gets decremented, it means we took a
maintenance interrupt, which means we exited the guest at some point.
Two possibilities:

- We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and
cmpxchg will fail, hence signaling the guest to reload its state. This
is not needed (the guest will reload its state anyway), but doesn't
cause any harm.

- We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg
will fail as well, triggering a reload which is needed this time.

	M.
Russell King - ARM Linux Dec. 5, 2012, 12:29 p.m. UTC | #5
On Wed, Dec 05, 2012 at 12:17:57PM +0000, Marc Zyngier wrote:
> On 05/12/12 10:58, Russell King - ARM Linux wrote:
> > On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote:
> >> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote:
> >>> From: Marc Zyngier <marc.zyngier@arm.com>
> >>>
> >>> If we have level interrupts already programmed to fire on a vcpu,
> >>> there is no reason to kick it after injecting a new interrupt,
> >>> as we're guaranteed that we'll exit when the level interrupt will
> >>> be EOId (VGIC_LR_EOI is set).
> >>>
> >>> The exit will force a reload of the VGIC, injecting the new interrupts.
> >>>
> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
> >>> ---
> >>>  arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
> >>>  arch/arm/kvm/arm.c              |   10 +++++++++-
> >>>  arch/arm/kvm/vgic.c             |   10 ++++++++--
> >>>  3 files changed, 27 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
> >>> index a8e7a93..7d2662c 100644
> >>> --- a/arch/arm/include/asm/kvm_vgic.h
> >>> +++ b/arch/arm/include/asm/kvm_vgic.h
> >>> @@ -215,6 +215,9 @@ struct vgic_cpu {
> >>>  	u32		vgic_elrsr[2];	/* Saved only */
> >>>  	u32		vgic_apr;
> >>>  	u32		vgic_lr[64];	/* A15 has only 4... */
> >>> +
> >>> +	/* Number of level-triggered interrupt in progress */
> >>> +	atomic_t	irq_active_count;
> >>>  #endif
> >>>  };
> >>>  
> >>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >>>  
> >>>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
> >>>  #define vgic_initialized(k)	((k)->arch.vgic.ready)
> >>> +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
> >>
> >> When is the atomic_t initialised to zero? I can only see increments.
> > 
> > I'd question whether an atomic type is correct for this; the only
> > protection that it's offering is to ensure that the atomic increment
> > and decrement occur atomically - there's nothing else that they're doing
> > in this code.
> > 
> > If those atomic increments and decrements are occuring beneath a common
> > lock, then using atomic types is just mere code obfuscation.
> 
> No, they occur on code paths that do not have a common lock (one of them
> being an interrupt handler). This may change though, after one comment
> Will made earlier (the thing about delayed interrupts).
> 
> If these two code sections become mutually exclusive, then indeed there
> will be no point in having an atomic type anymore.
> 
> > For example, I'd like to question the correctness of this:
> > 
> > +               if (vgic_active_irq(vcpu) &&
> > +                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
> > 
> > What if vgic_active_irq() reads the atomic type, immediately after it gets
> > decremented to zero before the cmpxchg() is executed?  Would that be a
> > problem?
> 
> I do not think so. If the value gets decremented, it means we took a
> maintenance interrupt, which means we exited the guest at some point.
> Two possibilities:
> 
> - We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and
> cmpxchg will fail, hence signaling the guest to reload its state. This
> is not needed (the guest will reload its state anyway), but doesn't
> cause any harm.

What is the relative ordering of the atomic decrement and setting
vcpu->mode to be OUTSIDE_GUEST_MODE ?  Is there a window where we have
decremented this atomic type but vcpu->mode is still set to IN_GUEST_MODE.

> - We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg
> will fail as well, triggering a reload which is needed this time.

Well, the whole code looks really weird to me, especially that:

+       if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
+               if (vgic_active_irq(vcpu) &&
+                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
+                       return 0;
+
+               return 1;
+       }

I've no idea what kvm_vcpu_exiting_guest_mode() is (it doesn't exist in
any tree I have access to)...

In any case, look at the version I converted to spinlocks and see whether
you think the code looks reasonable in that form.  If it doesn't then it
isn't reasonable in atomic types either.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier Dec. 5, 2012, 1:40 p.m. UTC | #6
On 05/12/12 12:29, Russell King - ARM Linux wrote:
> On Wed, Dec 05, 2012 at 12:17:57PM +0000, Marc Zyngier wrote:
>> On 05/12/12 10:58, Russell King - ARM Linux wrote:
>>> On Wed, Dec 05, 2012 at 10:43:58AM +0000, Will Deacon wrote:
>>>> On Sat, Nov 10, 2012 at 03:45:39PM +0000, Christoffer Dall wrote:
>>>>> From: Marc Zyngier <marc.zyngier@arm.com>
>>>>>
>>>>> If we have level interrupts already programmed to fire on a vcpu,
>>>>> there is no reason to kick it after injecting a new interrupt,
>>>>> as we're guaranteed that we'll exit when the level interrupt will
>>>>> be EOId (VGIC_LR_EOI is set).
>>>>>
>>>>> The exit will force a reload of the VGIC, injecting the new interrupts.
>>>>>
>>>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>>>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
>>>>> ---
>>>>>  arch/arm/include/asm/kvm_vgic.h |   10 ++++++++++
>>>>>  arch/arm/kvm/arm.c              |   10 +++++++++-
>>>>>  arch/arm/kvm/vgic.c             |   10 ++++++++--
>>>>>  3 files changed, 27 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
>>>>> index a8e7a93..7d2662c 100644
>>>>> --- a/arch/arm/include/asm/kvm_vgic.h
>>>>> +++ b/arch/arm/include/asm/kvm_vgic.h
>>>>> @@ -215,6 +215,9 @@ struct vgic_cpu {
>>>>>  	u32		vgic_elrsr[2];	/* Saved only */
>>>>>  	u32		vgic_apr;
>>>>>  	u32		vgic_lr[64];	/* A15 has only 4... */
>>>>> +
>>>>> +	/* Number of level-triggered interrupt in progress */
>>>>> +	atomic_t	irq_active_count;
>>>>>  #endif
>>>>>  };
>>>>>  
>>>>> @@ -254,6 +257,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>>>>  
>>>>>  #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
>>>>>  #define vgic_initialized(k)	((k)->arch.vgic.ready)
>>>>> +#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
>>>>
>>>> When is the atomic_t initialised to zero? I can only see increments.
>>>
>>> I'd question whether an atomic type is correct for this; the only
>>> protection that it's offering is to ensure that the atomic increment
>>> and decrement occur atomically - there's nothing else that they're doing
>>> in this code.
>>>
>>> If those atomic increments and decrements are occuring beneath a common
>>> lock, then using atomic types is just mere code obfuscation.
>>
>> No, they occur on code paths that do not have a common lock (one of them
>> being an interrupt handler). This may change though, after one comment
>> Will made earlier (the thing about delayed interrupts).
>>
>> If these two code sections become mutually exclusive, then indeed there
>> will be no point in having an atomic type anymore.
>>
>>> For example, I'd like to question the correctness of this:
>>>
>>> +               if (vgic_active_irq(vcpu) &&
>>> +                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
>>>
>>> What if vgic_active_irq() reads the atomic type, immediately after it gets
>>> decremented to zero before the cmpxchg() is executed?  Would that be a
>>> problem?
>>
>> I do not think so. If the value gets decremented, it means we took a
>> maintenance interrupt, which means we exited the guest at some point.
>> Two possibilities:
>>
>> - We're not in guest mode anymore (vcpu->mode = OUTSIDE_GUEST_MODE), and
>> cmpxchg will fail, hence signaling the guest to reload its state. This
>> is not needed (the guest will reload its state anyway), but doesn't
>> cause any harm.
> 
> What is the relative ordering of the atomic decrement and setting
> vcpu->mode to be OUTSIDE_GUEST_MODE ?  Is there a window where we have
> decremented this atomic type but vcpu->mode is still set to IN_GUEST_MODE.

OUTSIDE_GUEST_MODE always occurs first, while interrupts are still
masked in SVC. We then unmask the interrupts, causing the maintenance
interrupt to be handled. Only this handler causes the active count to be
decremented.

>> - We're back into the guest (vcpu->mode = IN_GUEST_MODE), and cmpxchg
>> will fail as well, triggering a reload which is needed this time.
> 
> Well, the whole code looks really weird to me, especially that:
> 
> +       if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
> +               if (vgic_active_irq(vcpu) &&
> +                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
> +                       return 0;
> +
> +               return 1;
> +       }
> 
> I've no idea what kvm_vcpu_exiting_guest_mode() is (it doesn't exist in
> any tree I have access to)...

You should find it in include/linux/kvm_host.h. It reads:
static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
{
        return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}

Admittedly, the whole sequence should be rewritten to be clearer. What
it does is "If we're running a guest and there is no active interrupt,
then kick the guest".

It probably means the above code should read:

if (!vgic_active_irq(vcpu))
        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;

return 0;

> In any case, look at the version I converted to spinlocks and see whether
> you think the code looks reasonable in that form.  If it doesn't then it
> isn't reasonable in atomic types either.

I had a look, and I don't find it problematic.

	M.
Russell King - ARM Linux Dec. 5, 2012, 3:55 p.m. UTC | #7
On Wed, Dec 05, 2012 at 01:40:24PM +0000, Marc Zyngier wrote:
> Admittedly, the whole sequence should be rewritten to be clearer. What
> it does is "If we're running a guest and there is no active interrupt,
> then kick the guest".

On the whole this entire thing should be written clearer; from the
explanations you've given it seems that the only reason this code works
is because you're relying on several behaviours all coming together to
achieve the right result - which makes for fragile code.

You're partly relying on atomic types to ensure that the increment and
decrement happen exclusively.  You're then relying on a combination of
IRQ protection and cmpxchg() to ensure that the non-atomic read of the
atomic type won't be a problem.

This doesn't inspire confidence, and I have big concerns over whether
this code will still be understandable in a number of years time.

And I still wonder how safe this is even with your explanations.  IRQ
disabling only works for the local CPU core so I still have questions
over this wrt a SMP host OS.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h
index a8e7a93..7d2662c 100644
--- a/arch/arm/include/asm/kvm_vgic.h
+++ b/arch/arm/include/asm/kvm_vgic.h
@@ -215,6 +215,9 @@  struct vgic_cpu {
 	u32		vgic_elrsr[2];	/* Saved only */
 	u32		vgic_apr;
 	u32		vgic_lr[64];	/* A15 has only 4... */
+
+	/* Number of level-triggered interrupt in progress */
+	atomic_t	irq_active_count;
 #endif
 };
 
@@ -254,6 +257,8 @@  bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.vctrl_base))
 #define vgic_initialized(k)	((k)->arch.vgic.ready)
+#define vgic_active_irq(v)	(atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
+
 #else
 static inline int kvm_vgic_hyp_init(void)
 {
@@ -305,6 +310,11 @@  static inline bool vgic_initialized(struct kvm *kvm)
 {
 	return true;
 }
+
+static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
 #endif
 
 #endif
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a633d9d..1716f12 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -94,7 +94,15 @@  int kvm_arch_hardware_enable(void *garbage)
 
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
-	return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
+	if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
+		if (vgic_active_irq(vcpu) &&
+		    cmpxchg(&vcpu->mode, EXITING_GUEST_MODE, IN_GUEST_MODE) == EXITING_GUEST_MODE)
+			return 0;
+
+		return 1;
+	}
+
+	return 0;
 }
 
 void kvm_arch_hardware_disable(void *garbage)
diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
index 415ddb8..146de1d 100644
--- a/arch/arm/kvm/vgic.c
+++ b/arch/arm/kvm/vgic.c
@@ -705,8 +705,10 @@  static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 		kvm_debug("LR%d piggyback for IRQ%d %x\n", lr, irq, vgic_cpu->vgic_lr[lr]);
 		BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
 		vgic_cpu->vgic_lr[lr] |= VGIC_LR_PENDING_BIT;
-		if (is_level)
+		if (is_level) {
 			vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
+			atomic_inc(&vgic_cpu->irq_active_count);
+		}
 		return true;
 	}
 
@@ -718,8 +720,10 @@  static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
 	vgic_cpu->vgic_lr[lr] = MK_LR_PEND(sgi_source_id, irq);
-	if (is_level)
+	if (is_level) {
 		vgic_cpu->vgic_lr[lr] |= VGIC_LR_EOI;
+		atomic_inc(&vgic_cpu->irq_active_count);
+	}
 
 	vgic_cpu->vgic_irq_lr_map[irq] = lr;
 	clear_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr);
@@ -1011,6 +1015,8 @@  static irqreturn_t vgic_maintenance_handler(int irq, void *data)
 
 			vgic_bitmap_set_irq_val(&dist->irq_active,
 						vcpu->vcpu_id, irq, 0);
+			atomic_dec(&vgic_cpu->irq_active_count);
+			smp_mb();
 			vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
 			writel_relaxed(vgic_cpu->vgic_lr[lr],
 				       dist->vctrl_base + GICH_LR0 + (lr << 2));