diff mbox

[v8,06/17] KVM: arm/arm64: VGIC: add refcounting for IRQs

Message ID 20160705112309.28877-7-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara July 5, 2016, 11:22 a.m. UTC
In the moment our struct vgic_irq's are statically allocated at guest
creation time. So getting a pointer to an IRQ structure is trivial and
safe. LPIs are more dynamic, they can be mapped and unmapped at any time
during the guest's _runtime_.
In preparation for supporting LPIs we introduce reference counting for
those structures using the kernel's kref infrastructure.
Since private IRQs and SPIs are statically allocated, the refcount never
drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
list and decrease it when it gets removed.
This introduces vgic_put_irq(), which wraps kref_put and hides the
release function from the callers.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 include/kvm/arm_vgic.h           |  1 +
 virt/kvm/arm/vgic/vgic-init.c    |  2 ++
 virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
 virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
 virt/kvm/arm/vgic/vgic-v2.c      |  1 +
 virt/kvm/arm/vgic/vgic-v3.c      |  1 +
 virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
 virt/kvm/arm/vgic/vgic.h         |  1 +
 9 files changed, 89 insertions(+), 18 deletions(-)

Comments

Christoffer Dall July 7, 2016, 1:13 p.m. UTC | #1
On Tue, Jul 05, 2016 at 12:22:58PM +0100, Andre Przywara wrote:
> In the moment our struct vgic_irq's are statically allocated at guest
> creation time. So getting a pointer to an IRQ structure is trivial and
> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
> during the guest's _runtime_.
> In preparation for supporting LPIs we introduce reference counting for
> those structures using the kernel's kref infrastructure.
> Since private IRQs and SPIs are statically allocated, the refcount never
> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
> list and decrease it when it gets removed.
> This introduces vgic_put_irq(), which wraps kref_put and hides the
> release function from the callers.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  include/kvm/arm_vgic.h           |  1 +
>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>  virt/kvm/arm/vgic/vgic.h         |  1 +
>  9 files changed, 89 insertions(+), 18 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 5142e2a..450b4da 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -96,6 +96,7 @@ struct vgic_irq {
>  	bool active;			/* not used for LPIs */
>  	bool enabled;
>  	bool hw;			/* Tied to HW IRQ */
> +	struct kref refcount;		/* Used for LPIs */
>  	u32 hwintid;			/* HW INTID number */
>  	union {
>  		u8 targets;			/* GICv2 target VCPUs mask */
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 90cae48..ac3c1a5 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>  		spin_lock_init(&irq->irq_lock);
>  		irq->vcpu = NULL;
>  		irq->target_vcpu = vcpu0;
> +		kref_init(&irq->refcount);
>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>  			irq->targets = 0;
>  		else
> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>  		irq->vcpu = NULL;
>  		irq->target_vcpu = vcpu;
>  		irq->targets = 1U << vcpu->vcpu_id;
> +		kref_init(&irq->refcount);
>  		if (vgic_irq_is_sgi(i)) {
>  			/* SGIs */
>  			irq->enabled = 1;
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
> index a213936..4152348 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>  		irq->source |= 1U << source_vcpu->vcpu_id;
>  
>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
> +		vgic_put_irq(source_vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->targets << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return val;
> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->source << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	return val;
>  }
> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>  			irq->pending = false;
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>  		} else {
>  			spin_unlock(&irq->irq_lock);
>  		}
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index fc7b6c9..bfcafbd 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>  {
>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> +	unsigned long ret = 0;
>  
>  	if (!irq)
>  		return 0;
>  
>  	/* The upper word is RAZ for us. */
> -	if (addr & 4)
> -		return 0;
> +	if (!(addr & 4))
> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>  
> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
> +	vgic_put_irq(vcpu->kvm, irq);
> +	return ret;
>  }
>  
>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>  				    unsigned long val)
>  {
>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> -
> -	if (!irq)
> -		return;
> +	struct vgic_irq *irq;
>  
>  	/* The upper word is WI for us since we don't implement Aff3. */
>  	if (addr & 4)
>  		return;
>  
> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> +
> +	if (!irq)
> +		return;
> +
>  	spin_lock(&irq->irq_lock);
>  
>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  }
>  
>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>  		irq->pending = true;
>  
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> index 9f6fab7..5e79e01 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>  
>  		if (irq->enabled)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>  		spin_lock(&irq->irq_lock);
>  		irq->enabled = true;
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>  		irq->enabled = false;
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>  
>  		if (irq->pending)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>  			irq->soft_pending = true;
>  
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>  
>  		if (irq->active)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>  	for_each_set_bit(i, &val, len * 8) {
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		vgic_mmio_change_active(vcpu, irq, false);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	vgic_change_active_finish(vcpu, intid);
>  }
> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>  	for_each_set_bit(i, &val, len * 8) {
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		vgic_mmio_change_active(vcpu, irq, true);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	vgic_change_active_finish(vcpu, intid);
>  }
> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->priority << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return val;
> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>  		/* Narrow the priority range to what we actually support */
>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>  		spin_unlock(&irq->irq_lock);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>  
>  		if (irq->config == VGIC_CONFIG_EDGE)
>  			value |= (2U << (i * 2));
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>  	int i;
>  
>  	for (i = 0; i < len * 4; i++) {
> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
> +		struct vgic_irq *irq;
>  
>  		/*
>  		 * The configuration cannot be changed for SGIs in general,
> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>  			continue;
>  
> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		spin_lock(&irq->irq_lock);
> +
>  		if (test_bit(i * 2 + 1, &val)) {
>  			irq->config = VGIC_CONFIG_EDGE;
>  		} else {
>  			irq->config = VGIC_CONFIG_LEVEL;
>  			irq->pending = irq->line_level | irq->soft_pending;
>  		}
> +
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
> index 079bf67..0bf6709 100644
> --- a/virt/kvm/arm/vgic/vgic-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-v2.c
> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index e48a22e..f0ac064 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 69b61ab..ae80894 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  			      u32 intid)
>  {
> -	/* SGIs and PPIs */
> -	if (intid <= VGIC_MAX_PRIVATE)
> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +	struct vgic_irq *irq;
>  
> -	/* SPIs */
> -	if (intid <= VGIC_MAX_SPI)
> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
> +		kref_get(&irq->refcount);
> +		return irq;
> +	}
> +
> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
> +		kref_get(&irq->refcount);
> +		return irq;
> +	}

It appears to me that this could be made nicer (the diff as well) by
just setting the IRQ variable and at the end of the function do:

if (irq)
	kref_get(irq->refcount);
return irq;

Perhaps a later patch will do something like that or make it obvious why
that's not a good idea.

Again, not a reason for a respin on its own.

>  
>  	/* LPIs are not yet covered */
>  	if (intid >= VGIC_MIN_LPI)
> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  	return NULL;
>  }
>  
> +/* The refcount should never drop to 0 at the moment. */
> +static void vgic_irq_release(struct kref *ref)
> +{
> +	WARN_ON(1);
> +}
> +
> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
> +{
> +	kref_put(&irq->refcount, vgic_irq_release);
> +}
> +
>  /**
>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>   *
> @@ -236,6 +254,7 @@ retry:
>  		goto retry;
>  	}
>  
> +	kref_get(&irq->refcount);
>  	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
>  	irq->vcpu = vcpu;
>  
> @@ -269,14 +288,17 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	if (!irq)
>  		return -EINVAL;
>  
> -	if (irq->hw != mapped_irq)
> +	if (irq->hw != mapped_irq) {
> +		vgic_put_irq(kvm, irq);
>  		return -EINVAL;
> +	}
>  
>  	spin_lock(&irq->irq_lock);
>  
>  	if (!vgic_validate_injection(irq, level)) {
>  		/* Nothing to see here, move along... */
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(kvm, irq);
>  		return 0;
>  	}
>  
> @@ -288,6 +310,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	}
>  
>  	vgic_queue_irq_unlock(kvm, irq);
> +	vgic_put_irq(kvm, irq);
>  
>  	return 0;
>  }
> @@ -330,25 +353,28 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
>  	irq->hwintid = phys_irq;
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return 0;
>  }
>  
>  int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
>  {
> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
> -
> -	BUG_ON(!irq);
> +	struct vgic_irq *irq;
>  
>  	if (!vgic_initialized(vcpu->kvm))
>  		return -EAGAIN;
>  
> +	irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
> +	BUG_ON(!irq);
> +
>  	spin_lock(&irq->irq_lock);
>  
>  	irq->hw = false;
>  	irq->hwintid = 0;
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return 0;
>  }
> @@ -386,6 +412,7 @@ retry:
>  			list_del(&irq->ap_list);
>  			irq->vcpu = NULL;
>  			spin_unlock(&irq->irq_lock);
> +			vgic_put_irq(vcpu->kvm, irq);
>  			continue;
>  		}
>  
> @@ -614,6 +641,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
>  	spin_lock(&irq->irq_lock);
>  	map_is_active = irq->hw && irq->active;
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return map_is_active;
>  }
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index c752152..5b79c34 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -38,6 +38,7 @@ struct vgic_vmcr {
>  
>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  			      u32 intid);
> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
>  bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
>  void vgic_kick_vcpus(struct kvm *kvm);
>  
> -- 
> 2.9.0
> 

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier July 7, 2016, 3 p.m. UTC | #2
On 05/07/16 12:22, Andre Przywara wrote:
> In the moment our struct vgic_irq's are statically allocated at guest
> creation time. So getting a pointer to an IRQ structure is trivial and
> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
> during the guest's _runtime_.
> In preparation for supporting LPIs we introduce reference counting for
> those structures using the kernel's kref infrastructure.
> Since private IRQs and SPIs are statically allocated, the refcount never
> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
> list and decrease it when it gets removed.
> This introduces vgic_put_irq(), which wraps kref_put and hides the
> release function from the callers.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  include/kvm/arm_vgic.h           |  1 +
>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>  virt/kvm/arm/vgic/vgic.h         |  1 +
>  9 files changed, 89 insertions(+), 18 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 5142e2a..450b4da 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -96,6 +96,7 @@ struct vgic_irq {
>  	bool active;			/* not used for LPIs */
>  	bool enabled;
>  	bool hw;			/* Tied to HW IRQ */
> +	struct kref refcount;		/* Used for LPIs */
>  	u32 hwintid;			/* HW INTID number */
>  	union {
>  		u8 targets;			/* GICv2 target VCPUs mask */
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 90cae48..ac3c1a5 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>  		spin_lock_init(&irq->irq_lock);
>  		irq->vcpu = NULL;
>  		irq->target_vcpu = vcpu0;
> +		kref_init(&irq->refcount);
>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>  			irq->targets = 0;
>  		else
> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>  		irq->vcpu = NULL;
>  		irq->target_vcpu = vcpu;
>  		irq->targets = 1U << vcpu->vcpu_id;
> +		kref_init(&irq->refcount);
>  		if (vgic_irq_is_sgi(i)) {
>  			/* SGIs */
>  			irq->enabled = 1;
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
> index a213936..4152348 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>  		irq->source |= 1U << source_vcpu->vcpu_id;
>  
>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
> +		vgic_put_irq(source_vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->targets << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return val;
> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->source << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	return val;
>  }
> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>  			irq->pending = false;
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>  		} else {
>  			spin_unlock(&irq->irq_lock);
>  		}
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index fc7b6c9..bfcafbd 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>  {
>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> +	unsigned long ret = 0;
>  
>  	if (!irq)
>  		return 0;
>  
>  	/* The upper word is RAZ for us. */
> -	if (addr & 4)
> -		return 0;
> +	if (!(addr & 4))
> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>  
> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
> +	vgic_put_irq(vcpu->kvm, irq);
> +	return ret;
>  }
>  
>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>  				    unsigned long val)
>  {
>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> -
> -	if (!irq)
> -		return;
> +	struct vgic_irq *irq;
>  
>  	/* The upper word is WI for us since we don't implement Aff3. */
>  	if (addr & 4)
>  		return;
>  
> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
> +
> +	if (!irq)
> +		return;
> +
>  	spin_lock(&irq->irq_lock);
>  
>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  }
>  
>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>  		irq->pending = true;
>  
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> index 9f6fab7..5e79e01 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>  
>  		if (irq->enabled)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>  		spin_lock(&irq->irq_lock);
>  		irq->enabled = true;
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>  		irq->enabled = false;
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>  
>  		if (irq->pending)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>  			irq->soft_pending = true;
>  
>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>  
>  		if (irq->active)
>  			value |= (1U << i);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>  	for_each_set_bit(i, &val, len * 8) {
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		vgic_mmio_change_active(vcpu, irq, false);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	vgic_change_active_finish(vcpu, intid);
>  }
> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>  	for_each_set_bit(i, &val, len * 8) {
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		vgic_mmio_change_active(vcpu, irq, true);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  	vgic_change_active_finish(vcpu, intid);
>  }
> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  
>  		val |= (u64)irq->priority << (i * 8);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return val;
> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>  		/* Narrow the priority range to what we actually support */
>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>  		spin_unlock(&irq->irq_lock);
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>  
>  		if (irq->config == VGIC_CONFIG_EDGE)
>  			value |= (2U << (i * 2));
> +
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  
>  	return value;
> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>  	int i;
>  
>  	for (i = 0; i < len * 4; i++) {
> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
> +		struct vgic_irq *irq;
>  
>  		/*
>  		 * The configuration cannot be changed for SGIs in general,
> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>  			continue;
>  
> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>  		spin_lock(&irq->irq_lock);
> +
>  		if (test_bit(i * 2 + 1, &val)) {
>  			irq->config = VGIC_CONFIG_EDGE;
>  		} else {
>  			irq->config = VGIC_CONFIG_LEVEL;
>  			irq->pending = irq->line_level | irq->soft_pending;
>  		}
> +
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
> index 079bf67..0bf6709 100644
> --- a/virt/kvm/arm/vgic/vgic-v2.c
> +++ b/virt/kvm/arm/vgic/vgic-v2.c
> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index e48a22e..f0ac064 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>  		}
>  
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(vcpu->kvm, irq);
>  	}
>  }
>  
> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
> index 69b61ab..ae80894 100644
> --- a/virt/kvm/arm/vgic/vgic.c
> +++ b/virt/kvm/arm/vgic/vgic.c
> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  			      u32 intid)
>  {
> -	/* SGIs and PPIs */
> -	if (intid <= VGIC_MAX_PRIVATE)
> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +	struct vgic_irq *irq;
>  
> -	/* SPIs */
> -	if (intid <= VGIC_MAX_SPI)
> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
> +		kref_get(&irq->refcount);
> +		return irq;
> +	}
> +
> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
> +		kref_get(&irq->refcount);
> +		return irq;
> +	}
>  
>  	/* LPIs are not yet covered */
>  	if (intid >= VGIC_MIN_LPI)
> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  	return NULL;
>  }
>  
> +/* The refcount should never drop to 0 at the moment. */
> +static void vgic_irq_release(struct kref *ref)
> +{
> +	WARN_ON(1);
> +}
> +
> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
> +{
> +	kref_put(&irq->refcount, vgic_irq_release);
> +}
> +
>  /**
>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>   *
> @@ -236,6 +254,7 @@ retry:
>  		goto retry;
>  	}
>  
> +	kref_get(&irq->refcount);

Could you use vgic_get_irq() instead? I know it is slightly overkill,
but I can already tell that we'll need to add some tracing in both the
put and get helpers in order to do some debugging. Having straight
kref_get/put is going to make this tracing difficult, so let's not go there.

>  	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
>  	irq->vcpu = vcpu;
>  
> @@ -269,14 +288,17 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	if (!irq)
>  		return -EINVAL;
>  
> -	if (irq->hw != mapped_irq)
> +	if (irq->hw != mapped_irq) {
> +		vgic_put_irq(kvm, irq);
>  		return -EINVAL;
> +	}
>  
>  	spin_lock(&irq->irq_lock);
>  
>  	if (!vgic_validate_injection(irq, level)) {
>  		/* Nothing to see here, move along... */
>  		spin_unlock(&irq->irq_lock);
> +		vgic_put_irq(kvm, irq);
>  		return 0;
>  	}
>  
> @@ -288,6 +310,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
>  	}
>  
>  	vgic_queue_irq_unlock(kvm, irq);
> +	vgic_put_irq(kvm, irq);
>  
>  	return 0;
>  }
> @@ -330,25 +353,28 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
>  	irq->hwintid = phys_irq;
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return 0;
>  }
>  
>  int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
>  {
> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
> -
> -	BUG_ON(!irq);
> +	struct vgic_irq *irq;
>  
>  	if (!vgic_initialized(vcpu->kvm))
>  		return -EAGAIN;
>  
> +	irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
> +	BUG_ON(!irq);
> +
>  	spin_lock(&irq->irq_lock);
>  
>  	irq->hw = false;
>  	irq->hwintid = 0;
>  
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return 0;
>  }
> @@ -386,6 +412,7 @@ retry:
>  			list_del(&irq->ap_list);
>  			irq->vcpu = NULL;
>  			spin_unlock(&irq->irq_lock);
> +			vgic_put_irq(vcpu->kvm, irq);
>  			continue;
>  		}
>  
> @@ -614,6 +641,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
>  	spin_lock(&irq->irq_lock);
>  	map_is_active = irq->hw && irq->active;
>  	spin_unlock(&irq->irq_lock);
> +	vgic_put_irq(vcpu->kvm, irq);
>  
>  	return map_is_active;
>  }
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index c752152..5b79c34 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -38,6 +38,7 @@ struct vgic_vmcr {
>  
>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>  			      u32 intid);
> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
>  bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
>  void vgic_kick_vcpus(struct kvm *kvm);
>  
> 

Otherwise looks good.

Thanks,

	M.
Andre Przywara July 8, 2016, 10:28 a.m. UTC | #3
Hi,

On 07/07/16 16:00, Marc Zyngier wrote:
> On 05/07/16 12:22, Andre Przywara wrote:
>> In the moment our struct vgic_irq's are statically allocated at guest
>> creation time. So getting a pointer to an IRQ structure is trivial and
>> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
>> during the guest's _runtime_.
>> In preparation for supporting LPIs we introduce reference counting for
>> those structures using the kernel's kref infrastructure.
>> Since private IRQs and SPIs are statically allocated, the refcount never
>> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
>> list and decrease it when it gets removed.
>> This introduces vgic_put_irq(), which wraps kref_put and hides the
>> release function from the callers.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  include/kvm/arm_vgic.h           |  1 +
>>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>>  virt/kvm/arm/vgic/vgic.h         |  1 +
>>  9 files changed, 89 insertions(+), 18 deletions(-)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index 5142e2a..450b4da 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -96,6 +96,7 @@ struct vgic_irq {
>>  	bool active;			/* not used for LPIs */
>>  	bool enabled;
>>  	bool hw;			/* Tied to HW IRQ */
>> +	struct kref refcount;		/* Used for LPIs */
>>  	u32 hwintid;			/* HW INTID number */
>>  	union {
>>  		u8 targets;			/* GICv2 target VCPUs mask */
>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>> index 90cae48..ac3c1a5 100644
>> --- a/virt/kvm/arm/vgic/vgic-init.c
>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>>  		spin_lock_init(&irq->irq_lock);
>>  		irq->vcpu = NULL;
>>  		irq->target_vcpu = vcpu0;
>> +		kref_init(&irq->refcount);
>>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>>  			irq->targets = 0;
>>  		else
>> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>  		irq->vcpu = NULL;
>>  		irq->target_vcpu = vcpu;
>>  		irq->targets = 1U << vcpu->vcpu_id;
>> +		kref_init(&irq->refcount);
>>  		if (vgic_irq_is_sgi(i)) {
>>  			/* SGIs */
>>  			irq->enabled = 1;
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>> index a213936..4152348 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>>  		irq->source |= 1U << source_vcpu->vcpu_id;
>>  
>>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
>> +		vgic_put_irq(source_vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  
>>  		val |= (u64)irq->targets << (i * 8);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return val;
>> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  
>>  		val |= (u64)irq->source << (i * 8);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  	return val;
>>  }
>> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>>  			irq->pending = false;
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>>  		} else {
>>  			spin_unlock(&irq->irq_lock);
>>  		}
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> index fc7b6c9..bfcafbd 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>>  {
>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>> +	unsigned long ret = 0;
>>  
>>  	if (!irq)
>>  		return 0;
>>  
>>  	/* The upper word is RAZ for us. */
>> -	if (addr & 4)
>> -		return 0;
>> +	if (!(addr & 4))
>> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>  
>> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>> +	vgic_put_irq(vcpu->kvm, irq);
>> +	return ret;
>>  }
>>  
>>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>  				    unsigned long val)
>>  {
>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>> -
>> -	if (!irq)
>> -		return;
>> +	struct vgic_irq *irq;
>>  
>>  	/* The upper word is WI for us since we don't implement Aff3. */
>>  	if (addr & 4)
>>  		return;
>>  
>> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>> +
>> +	if (!irq)
>> +		return;
>> +
>>  	spin_lock(&irq->irq_lock);
>>  
>>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
>> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>>  
>>  	spin_unlock(&irq->irq_lock);
>> +	vgic_put_irq(vcpu->kvm, irq);
>>  }
>>  
>>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>>  		irq->pending = true;
>>  
>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>> index 9f6fab7..5e79e01 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>>  
>>  		if (irq->enabled)
>>  			value |= (1U << i);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return value;
>> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>>  		spin_lock(&irq->irq_lock);
>>  		irq->enabled = true;
>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>>  		irq->enabled = false;
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>>  
>>  		if (irq->pending)
>>  			value |= (1U << i);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return value;
>> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>>  			irq->soft_pending = true;
>>  
>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>>  		}
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>>  
>>  		if (irq->active)
>>  			value |= (1U << i);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return value;
>> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>>  	for_each_set_bit(i, &val, len * 8) {
>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  		vgic_mmio_change_active(vcpu, irq, false);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  	vgic_change_active_finish(vcpu, intid);
>>  }
>> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>>  	for_each_set_bit(i, &val, len * 8) {
>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  		vgic_mmio_change_active(vcpu, irq, true);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  	vgic_change_active_finish(vcpu, intid);
>>  }
>> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  
>>  		val |= (u64)irq->priority << (i * 8);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return val;
>> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>>  		/* Narrow the priority range to what we actually support */
>>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>>  		spin_unlock(&irq->irq_lock);
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>>  
>>  		if (irq->config == VGIC_CONFIG_EDGE)
>>  			value |= (2U << (i * 2));
>> +
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  
>>  	return value;
>> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>  	int i;
>>  
>>  	for (i = 0; i < len * 4; i++) {
>> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>> +		struct vgic_irq *irq;
>>  
>>  		/*
>>  		 * The configuration cannot be changed for SGIs in general,
>> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>>  			continue;
>>  
>> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>  		spin_lock(&irq->irq_lock);
>> +
>>  		if (test_bit(i * 2 + 1, &val)) {
>>  			irq->config = VGIC_CONFIG_EDGE;
>>  		} else {
>>  			irq->config = VGIC_CONFIG_LEVEL;
>>  			irq->pending = irq->line_level | irq->soft_pending;
>>  		}
>> +
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
>> index 079bf67..0bf6709 100644
>> --- a/virt/kvm/arm/vgic/vgic-v2.c
>> +++ b/virt/kvm/arm/vgic/vgic-v2.c
>> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>>  		}
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>> index e48a22e..f0ac064 100644
>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>>  		}
>>  
>>  		spin_unlock(&irq->irq_lock);
>> +		vgic_put_irq(vcpu->kvm, irq);
>>  	}
>>  }
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
>> index 69b61ab..ae80894 100644
>> --- a/virt/kvm/arm/vgic/vgic.c
>> +++ b/virt/kvm/arm/vgic/vgic.c
>> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>  			      u32 intid)
>>  {
>> -	/* SGIs and PPIs */
>> -	if (intid <= VGIC_MAX_PRIVATE)
>> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>> +	struct vgic_irq *irq;
>>  
>> -	/* SPIs */
>> -	if (intid <= VGIC_MAX_SPI)
>> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
>> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
>> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
>> +		kref_get(&irq->refcount);
>> +		return irq;
>> +	}
>> +
>> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
>> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
>> +		kref_get(&irq->refcount);
>> +		return irq;
>> +	}
>>  
>>  	/* LPIs are not yet covered */
>>  	if (intid >= VGIC_MIN_LPI)
>> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>  	return NULL;
>>  }
>>  
>> +/* The refcount should never drop to 0 at the moment. */
>> +static void vgic_irq_release(struct kref *ref)
>> +{
>> +	WARN_ON(1);
>> +}
>> +
>> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
>> +{
>> +	kref_put(&irq->refcount, vgic_irq_release);
>> +}
>> +
>>  /**
>>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>>   *
>> @@ -236,6 +254,7 @@ retry:
>>  		goto retry;
>>  	}
>>  
>> +	kref_get(&irq->refcount);
> 
> Could you use vgic_get_irq() instead? I know it is slightly overkill,
> but I can already tell that we'll need to add some tracing in both the
> put and get helpers in order to do some debugging. Having straight
> kref_get/put is going to make this tracing difficult, so let's not go there.

I'd rather not.
1) Putting the IRQ on the ap_list is the "other user" of the
refcounting, I don't want to mix that unnecessarily with the
vgic_get_irq() (as in: get the struct by the number) use case. That may
actually help tracing, since we can have separate tracepoints to
distinguish them.
2) This would violate the locking order, since we hold the irq_lock here
and possibly take the lpi_list_lock in vgic_get_irq().
I don't think we can or should drop the irq_lock and re-take it just for
this.

I am happy to revisit this though once we actually get the tracepoints.

Cheers,
Andre.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier July 8, 2016, 10:50 a.m. UTC | #4
On 08/07/16 11:28, Andre Przywara wrote:
> Hi,
> 
> On 07/07/16 16:00, Marc Zyngier wrote:
>> On 05/07/16 12:22, Andre Przywara wrote:
>>> In the moment our struct vgic_irq's are statically allocated at guest
>>> creation time. So getting a pointer to an IRQ structure is trivial and
>>> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
>>> during the guest's _runtime_.
>>> In preparation for supporting LPIs we introduce reference counting for
>>> those structures using the kernel's kref infrastructure.
>>> Since private IRQs and SPIs are statically allocated, the refcount never
>>> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
>>> list and decrease it when it gets removed.
>>> This introduces vgic_put_irq(), which wraps kref_put and hides the
>>> release function from the callers.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>>  include/kvm/arm_vgic.h           |  1 +
>>>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>>>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>>>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>>>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>>>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>>>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>>>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>>>  virt/kvm/arm/vgic/vgic.h         |  1 +
>>>  9 files changed, 89 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index 5142e2a..450b4da 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -96,6 +96,7 @@ struct vgic_irq {
>>>  	bool active;			/* not used for LPIs */
>>>  	bool enabled;
>>>  	bool hw;			/* Tied to HW IRQ */
>>> +	struct kref refcount;		/* Used for LPIs */
>>>  	u32 hwintid;			/* HW INTID number */
>>>  	union {
>>>  		u8 targets;			/* GICv2 target VCPUs mask */
>>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>>> index 90cae48..ac3c1a5 100644
>>> --- a/virt/kvm/arm/vgic/vgic-init.c
>>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>>> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>>>  		spin_lock_init(&irq->irq_lock);
>>>  		irq->vcpu = NULL;
>>>  		irq->target_vcpu = vcpu0;
>>> +		kref_init(&irq->refcount);
>>>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>>>  			irq->targets = 0;
>>>  		else
>>> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>>  		irq->vcpu = NULL;
>>>  		irq->target_vcpu = vcpu;
>>>  		irq->targets = 1U << vcpu->vcpu_id;
>>> +		kref_init(&irq->refcount);
>>>  		if (vgic_irq_is_sgi(i)) {
>>>  			/* SGIs */
>>>  			irq->enabled = 1;
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>> index a213936..4152348 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>>>  		irq->source |= 1U << source_vcpu->vcpu_id;
>>>  
>>>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
>>> +		vgic_put_irq(source_vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  
>>>  		val |= (u64)irq->targets << (i * 8);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return val;
>>> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>>>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  
>>>  		val |= (u64)irq->source << (i * 8);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  	return val;
>>>  }
>>> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>>>  			irq->pending = false;
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>>>  		} else {
>>>  			spin_unlock(&irq->irq_lock);
>>>  		}
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> index fc7b6c9..bfcafbd 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>>>  {
>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>> +	unsigned long ret = 0;
>>>  
>>>  	if (!irq)
>>>  		return 0;
>>>  
>>>  	/* The upper word is RAZ for us. */
>>> -	if (addr & 4)
>>> -		return 0;
>>> +	if (!(addr & 4))
>>> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>>  
>>> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>> +	vgic_put_irq(vcpu->kvm, irq);
>>> +	return ret;
>>>  }
>>>  
>>>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>  				    unsigned long val)
>>>  {
>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>> -
>>> -	if (!irq)
>>> -		return;
>>> +	struct vgic_irq *irq;
>>>  
>>>  	/* The upper word is WI for us since we don't implement Aff3. */
>>>  	if (addr & 4)
>>>  		return;
>>>  
>>> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>> +
>>> +	if (!irq)
>>> +		return;
>>> +
>>>  	spin_lock(&irq->irq_lock);
>>>  
>>>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
>>> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>>>  
>>>  	spin_unlock(&irq->irq_lock);
>>> +	vgic_put_irq(vcpu->kvm, irq);
>>>  }
>>>  
>>>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>>> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>>>  		irq->pending = true;
>>>  
>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>>> index 9f6fab7..5e79e01 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>>> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>>>  
>>>  		if (irq->enabled)
>>>  			value |= (1U << i);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return value;
>>> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>>>  		spin_lock(&irq->irq_lock);
>>>  		irq->enabled = true;
>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>>>  		irq->enabled = false;
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>>>  
>>>  		if (irq->pending)
>>>  			value |= (1U << i);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return value;
>>> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>>>  			irq->soft_pending = true;
>>>  
>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>>>  		}
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>>>  
>>>  		if (irq->active)
>>>  			value |= (1U << i);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return value;
>>> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>>>  	for_each_set_bit(i, &val, len * 8) {
>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  		vgic_mmio_change_active(vcpu, irq, false);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  	vgic_change_active_finish(vcpu, intid);
>>>  }
>>> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>>>  	for_each_set_bit(i, &val, len * 8) {
>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  		vgic_mmio_change_active(vcpu, irq, true);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  	vgic_change_active_finish(vcpu, intid);
>>>  }
>>> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  
>>>  		val |= (u64)irq->priority << (i * 8);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return val;
>>> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>>>  		/* Narrow the priority range to what we actually support */
>>>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>>>  		spin_unlock(&irq->irq_lock);
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>>>  
>>>  		if (irq->config == VGIC_CONFIG_EDGE)
>>>  			value |= (2U << (i * 2));
>>> +
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  
>>>  	return value;
>>> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>  	int i;
>>>  
>>>  	for (i = 0; i < len * 4; i++) {
>>> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>> +		struct vgic_irq *irq;
>>>  
>>>  		/*
>>>  		 * The configuration cannot be changed for SGIs in general,
>>> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>>>  			continue;
>>>  
>>> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>  		spin_lock(&irq->irq_lock);
>>> +
>>>  		if (test_bit(i * 2 + 1, &val)) {
>>>  			irq->config = VGIC_CONFIG_EDGE;
>>>  		} else {
>>>  			irq->config = VGIC_CONFIG_LEVEL;
>>>  			irq->pending = irq->line_level | irq->soft_pending;
>>>  		}
>>> +
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
>>> index 079bf67..0bf6709 100644
>>> --- a/virt/kvm/arm/vgic/vgic-v2.c
>>> +++ b/virt/kvm/arm/vgic/vgic-v2.c
>>> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>>>  		}
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>>> index e48a22e..f0ac064 100644
>>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>>> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>>>  		}
>>>  
>>>  		spin_unlock(&irq->irq_lock);
>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>  	}
>>>  }
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
>>> index 69b61ab..ae80894 100644
>>> --- a/virt/kvm/arm/vgic/vgic.c
>>> +++ b/virt/kvm/arm/vgic/vgic.c
>>> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>>>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>  			      u32 intid)
>>>  {
>>> -	/* SGIs and PPIs */
>>> -	if (intid <= VGIC_MAX_PRIVATE)
>>> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
>>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>>> +	struct vgic_irq *irq;
>>>  
>>> -	/* SPIs */
>>> -	if (intid <= VGIC_MAX_SPI)
>>> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
>>> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
>>> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
>>> +		kref_get(&irq->refcount);
>>> +		return irq;
>>> +	}
>>> +
>>> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
>>> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
>>> +		kref_get(&irq->refcount);
>>> +		return irq;
>>> +	}
>>>  
>>>  	/* LPIs are not yet covered */
>>>  	if (intid >= VGIC_MIN_LPI)
>>> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>  	return NULL;
>>>  }
>>>  
>>> +/* The refcount should never drop to 0 at the moment. */
>>> +static void vgic_irq_release(struct kref *ref)
>>> +{
>>> +	WARN_ON(1);
>>> +}
>>> +
>>> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
>>> +{
>>> +	kref_put(&irq->refcount, vgic_irq_release);
>>> +}
>>> +
>>>  /**
>>>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>>>   *
>>> @@ -236,6 +254,7 @@ retry:
>>>  		goto retry;
>>>  	}
>>>  
>>> +	kref_get(&irq->refcount);
>>
>> Could you use vgic_get_irq() instead? I know it is slightly overkill,
>> but I can already tell that we'll need to add some tracing in both the
>> put and get helpers in order to do some debugging. Having straight
>> kref_get/put is going to make this tracing difficult, so let's not go there.
> 
> I'd rather not.
> 1) Putting the IRQ on the ap_list is the "other user" of the
> refcounting, I don't want to mix that unnecessarily with the
> vgic_get_irq() (as in: get the struct by the number) use case. That may
> actually help tracing, since we can have separate tracepoints to
> distinguish them.

And yet you end-up doing a vgic_put_irq() in the fold operation. Which
is wrong, by the way, as the interrupt is still in in ap_list. This
should be moved to the prune operation.

> 2) This would violate the locking order, since we hold the irq_lock here
> and possibly take the lpi_list_lock in vgic_get_irq().
> I don't think we can or should drop the irq_lock and re-take it just for
> this.

That's a much more convincing argument. And when you take the above into
account, you realize that your locking is not correct. You shouldn't be
dropping the refcount in fold, but in prune, meaning that you're holding
the ap_lock and the irq_lock, same as when you inserted the interrupt in
the list.

This is outlining an essential principle: if your locking/refcounting is
not symmetric, it is likely that you're doing something wrong, and that
should bother you really badly.

Thanks,

	M.
Andre Przywara July 8, 2016, 12:54 p.m. UTC | #5
On 08/07/16 11:50, Marc Zyngier wrote:
> On 08/07/16 11:28, Andre Przywara wrote:
>> Hi,
>>
>> On 07/07/16 16:00, Marc Zyngier wrote:
>>> On 05/07/16 12:22, Andre Przywara wrote:
>>>> In the moment our struct vgic_irq's are statically allocated at guest
>>>> creation time. So getting a pointer to an IRQ structure is trivial and
>>>> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
>>>> during the guest's _runtime_.
>>>> In preparation for supporting LPIs we introduce reference counting for
>>>> those structures using the kernel's kref infrastructure.
>>>> Since private IRQs and SPIs are statically allocated, the refcount never
>>>> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
>>>> list and decrease it when it gets removed.
>>>> This introduces vgic_put_irq(), which wraps kref_put and hides the
>>>> release function from the callers.
>>>>
>>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>>> ---
>>>>  include/kvm/arm_vgic.h           |  1 +
>>>>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>>>>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>>>>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>>>>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>>>>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>>>>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>>>>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>>>>  virt/kvm/arm/vgic/vgic.h         |  1 +
>>>>  9 files changed, 89 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>> index 5142e2a..450b4da 100644
>>>> --- a/include/kvm/arm_vgic.h
>>>> +++ b/include/kvm/arm_vgic.h
>>>> @@ -96,6 +96,7 @@ struct vgic_irq {
>>>>  	bool active;			/* not used for LPIs */
>>>>  	bool enabled;
>>>>  	bool hw;			/* Tied to HW IRQ */
>>>> +	struct kref refcount;		/* Used for LPIs */
>>>>  	u32 hwintid;			/* HW INTID number */
>>>>  	union {
>>>>  		u8 targets;			/* GICv2 target VCPUs mask */
>>>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>>>> index 90cae48..ac3c1a5 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-init.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>>>> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>>>>  		spin_lock_init(&irq->irq_lock);
>>>>  		irq->vcpu = NULL;
>>>>  		irq->target_vcpu = vcpu0;
>>>> +		kref_init(&irq->refcount);
>>>>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>>>>  			irq->targets = 0;
>>>>  		else
>>>> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>>>  		irq->vcpu = NULL;
>>>>  		irq->target_vcpu = vcpu;
>>>>  		irq->targets = 1U << vcpu->vcpu_id;
>>>> +		kref_init(&irq->refcount);
>>>>  		if (vgic_irq_is_sgi(i)) {
>>>>  			/* SGIs */
>>>>  			irq->enabled = 1;
>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> index a213936..4152348 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>>>>  		irq->source |= 1U << source_vcpu->vcpu_id;
>>>>  
>>>>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
>>>> +		vgic_put_irq(source_vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  
>>>>  		val |= (u64)irq->targets << (i * 8);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return val;
>>>> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>>>>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  
>>>>  		val |= (u64)irq->source << (i * 8);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  	return val;
>>>>  }
>>>> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>>>>  			irq->pending = false;
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>>>>  		} else {
>>>>  			spin_unlock(&irq->irq_lock);
>>>>  		}
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>> index fc7b6c9..bfcafbd 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>>>>  {
>>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>>>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>> +	unsigned long ret = 0;
>>>>  
>>>>  	if (!irq)
>>>>  		return 0;
>>>>  
>>>>  	/* The upper word is RAZ for us. */
>>>> -	if (addr & 4)
>>>> -		return 0;
>>>> +	if (!(addr & 4))
>>>> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>>>  
>>>> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>>> +	vgic_put_irq(vcpu->kvm, irq);
>>>> +	return ret;
>>>>  }
>>>>  
>>>>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>>  				    unsigned long val)
>>>>  {
>>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>>> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>> -
>>>> -	if (!irq)
>>>> -		return;
>>>> +	struct vgic_irq *irq;
>>>>  
>>>>  	/* The upper word is WI for us since we don't implement Aff3. */
>>>>  	if (addr & 4)
>>>>  		return;
>>>>  
>>>> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>> +
>>>> +	if (!irq)
>>>> +		return;
>>>> +
>>>>  	spin_lock(&irq->irq_lock);
>>>>  
>>>>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
>>>> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>>>>  
>>>>  	spin_unlock(&irq->irq_lock);
>>>> +	vgic_put_irq(vcpu->kvm, irq);
>>>>  }
>>>>  
>>>>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>>>> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>>>>  		irq->pending = true;
>>>>  
>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>>>> index 9f6fab7..5e79e01 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-mmio.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>>>> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>>>>  
>>>>  		if (irq->enabled)
>>>>  			value |= (1U << i);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return value;
>>>> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>>>>  		spin_lock(&irq->irq_lock);
>>>>  		irq->enabled = true;
>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>>>>  		irq->enabled = false;
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>>>>  
>>>>  		if (irq->pending)
>>>>  			value |= (1U << i);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return value;
>>>> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>>>>  			irq->soft_pending = true;
>>>>  
>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>>>>  		}
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>>>>  
>>>>  		if (irq->active)
>>>>  			value |= (1U << i);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return value;
>>>> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>>>>  	for_each_set_bit(i, &val, len * 8) {
>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  		vgic_mmio_change_active(vcpu, irq, false);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  	vgic_change_active_finish(vcpu, intid);
>>>>  }
>>>> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>>>>  	for_each_set_bit(i, &val, len * 8) {
>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  		vgic_mmio_change_active(vcpu, irq, true);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  	vgic_change_active_finish(vcpu, intid);
>>>>  }
>>>> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  
>>>>  		val |= (u64)irq->priority << (i * 8);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return val;
>>>> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>>>>  		/* Narrow the priority range to what we actually support */
>>>>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>>>>  
>>>>  		if (irq->config == VGIC_CONFIG_EDGE)
>>>>  			value |= (2U << (i * 2));
>>>> +
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  
>>>>  	return value;
>>>> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>>  	int i;
>>>>  
>>>>  	for (i = 0; i < len * 4; i++) {
>>>> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>> +		struct vgic_irq *irq;
>>>>  
>>>>  		/*
>>>>  		 * The configuration cannot be changed for SGIs in general,
>>>> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>>>>  			continue;
>>>>  
>>>> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>  		spin_lock(&irq->irq_lock);
>>>> +
>>>>  		if (test_bit(i * 2 + 1, &val)) {
>>>>  			irq->config = VGIC_CONFIG_EDGE;
>>>>  		} else {
>>>>  			irq->config = VGIC_CONFIG_LEVEL;
>>>>  			irq->pending = irq->line_level | irq->soft_pending;
>>>>  		}
>>>> +
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
>>>> index 079bf67..0bf6709 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-v2.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-v2.c
>>>> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>>>>  		}
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>>>> index e48a22e..f0ac064 100644
>>>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>>>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>>>> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>>>>  		}
>>>>  
>>>>  		spin_unlock(&irq->irq_lock);
>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>  	}
>>>>  }
>>>>  
>>>> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
>>>> index 69b61ab..ae80894 100644
>>>> --- a/virt/kvm/arm/vgic/vgic.c
>>>> +++ b/virt/kvm/arm/vgic/vgic.c
>>>> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>>>>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>>  			      u32 intid)
>>>>  {
>>>> -	/* SGIs and PPIs */
>>>> -	if (intid <= VGIC_MAX_PRIVATE)
>>>> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
>>>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>>>> +	struct vgic_irq *irq;
>>>>  
>>>> -	/* SPIs */
>>>> -	if (intid <= VGIC_MAX_SPI)
>>>> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
>>>> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
>>>> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
>>>> +		kref_get(&irq->refcount);
>>>> +		return irq;
>>>> +	}
>>>> +
>>>> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
>>>> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
>>>> +		kref_get(&irq->refcount);
>>>> +		return irq;
>>>> +	}
>>>>  
>>>>  	/* LPIs are not yet covered */
>>>>  	if (intid >= VGIC_MIN_LPI)
>>>> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>>  	return NULL;
>>>>  }
>>>>  
>>>> +/* The refcount should never drop to 0 at the moment. */
>>>> +static void vgic_irq_release(struct kref *ref)
>>>> +{
>>>> +	WARN_ON(1);
>>>> +}
>>>> +
>>>> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
>>>> +{
>>>> +	kref_put(&irq->refcount, vgic_irq_release);
>>>> +}
>>>> +
>>>>  /**
>>>>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>>>>   *
>>>> @@ -236,6 +254,7 @@ retry:
>>>>  		goto retry;
>>>>  	}
>>>>  
>>>> +	kref_get(&irq->refcount);
>>>
>>> Could you use vgic_get_irq() instead? I know it is slightly overkill,
>>> but I can already tell that we'll need to add some tracing in both the
>>> put and get helpers in order to do some debugging. Having straight
>>> kref_get/put is going to make this tracing difficult, so let's not go there.
>>
>> I'd rather not.
>> 1) Putting the IRQ on the ap_list is the "other user" of the
>> refcounting, I don't want to mix that unnecessarily with the
>> vgic_get_irq() (as in: get the struct by the number) use case. That may
>> actually help tracing, since we can have separate tracepoints to
>> distinguish them.
> 
> And yet you end-up doing a vgic_put_irq() in the fold operation. Which
> is wrong, by the way, as the interrupt is still in in ap_list. This
> should be moved to the prune operation.
> 
>> 2) This would violate the locking order, since we hold the irq_lock here
>> and possibly take the lpi_list_lock in vgic_get_irq().
>> I don't think we can or should drop the irq_lock and re-take it just for
>> this.
> 
> That's a much more convincing argument. And when you take the above into
> account, you realize that your locking is not correct. You shouldn't be
> dropping the refcount in fold, but in prune, meaning that you're holding
> the ap_lock and the irq_lock, same as when you inserted the interrupt in
> the list.
> 
> This is outlining an essential principle: if your locking/refcounting is
> not symmetric, it is likely that you're doing something wrong, and that
> should bother you really badly.

Can you point me to the exact location where it's not symmetric?
I just looked at it again and can't find the issue.
I "put" it in v[23]_fold because we did a vgic_get_irq a few lines
before to translate the LR's intid into our struct vgic_irq pointer.
The vgic_get_irq() call isn't in this patch, because we used it already
before and this patch is just adding the respective puts.
The only asymmetry I could find is the expected one when it comes to and
goes from the ap_list.

Cheers,
Andre.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier July 8, 2016, 1:09 p.m. UTC | #6
On 08/07/16 13:54, André Przywara wrote:
> On 08/07/16 11:50, Marc Zyngier wrote:
>> On 08/07/16 11:28, Andre Przywara wrote:
>>> Hi,
>>>
>>> On 07/07/16 16:00, Marc Zyngier wrote:
>>>> On 05/07/16 12:22, Andre Przywara wrote:
>>>>> In the moment our struct vgic_irq's are statically allocated at guest
>>>>> creation time. So getting a pointer to an IRQ structure is trivial and
>>>>> safe. LPIs are more dynamic, they can be mapped and unmapped at any time
>>>>> during the guest's _runtime_.
>>>>> In preparation for supporting LPIs we introduce reference counting for
>>>>> those structures using the kernel's kref infrastructure.
>>>>> Since private IRQs and SPIs are statically allocated, the refcount never
>>>>> drops to 0 at the moment, but we increase it when an IRQ gets onto a VCPU
>>>>> list and decrease it when it gets removed.
>>>>> This introduces vgic_put_irq(), which wraps kref_put and hides the
>>>>> release function from the callers.
>>>>>
>>>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>>>> ---
>>>>>  include/kvm/arm_vgic.h           |  1 +
>>>>>  virt/kvm/arm/vgic/vgic-init.c    |  2 ++
>>>>>  virt/kvm/arm/vgic/vgic-mmio-v2.c |  8 +++++++
>>>>>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 20 +++++++++++------
>>>>>  virt/kvm/arm/vgic/vgic-mmio.c    | 25 ++++++++++++++++++++-
>>>>>  virt/kvm/arm/vgic/vgic-v2.c      |  1 +
>>>>>  virt/kvm/arm/vgic/vgic-v3.c      |  1 +
>>>>>  virt/kvm/arm/vgic/vgic.c         | 48 +++++++++++++++++++++++++++++++---------
>>>>>  virt/kvm/arm/vgic/vgic.h         |  1 +
>>>>>  9 files changed, 89 insertions(+), 18 deletions(-)
>>>>>
>>>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>>>> index 5142e2a..450b4da 100644
>>>>> --- a/include/kvm/arm_vgic.h
>>>>> +++ b/include/kvm/arm_vgic.h
>>>>> @@ -96,6 +96,7 @@ struct vgic_irq {
>>>>>  	bool active;			/* not used for LPIs */
>>>>>  	bool enabled;
>>>>>  	bool hw;			/* Tied to HW IRQ */
>>>>> +	struct kref refcount;		/* Used for LPIs */
>>>>>  	u32 hwintid;			/* HW INTID number */
>>>>>  	union {
>>>>>  		u8 targets;			/* GICv2 target VCPUs mask */
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>>>>> index 90cae48..ac3c1a5 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-init.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>>>>> @@ -177,6 +177,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
>>>>>  		spin_lock_init(&irq->irq_lock);
>>>>>  		irq->vcpu = NULL;
>>>>>  		irq->target_vcpu = vcpu0;
>>>>> +		kref_init(&irq->refcount);
>>>>>  		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
>>>>>  			irq->targets = 0;
>>>>>  		else
>>>>> @@ -211,6 +212,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>>>>  		irq->vcpu = NULL;
>>>>>  		irq->target_vcpu = vcpu;
>>>>>  		irq->targets = 1U << vcpu->vcpu_id;
>>>>> +		kref_init(&irq->refcount);
>>>>>  		if (vgic_irq_is_sgi(i)) {
>>>>>  			/* SGIs */
>>>>>  			irq->enabled = 1;
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>>> index a213936..4152348 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
>>>>> @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
>>>>>  		irq->source |= 1U << source_vcpu->vcpu_id;
>>>>>  
>>>>>  		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
>>>>> +		vgic_put_irq(source_vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
>>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  
>>>>>  		val |= (u64)irq->targets << (i * 8);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return val;
>>>>> @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
>>>>>  		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
>>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  
>>>>>  		val |= (u64)irq->source << (i * 8);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  	return val;
>>>>>  }
>>>>> @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
>>>>>  			irq->pending = false;
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
>>>>>  		} else {
>>>>>  			spin_unlock(&irq->irq_lock);
>>>>>  		}
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>>> index fc7b6c9..bfcafbd 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>>>> @@ -80,15 +80,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
>>>>>  {
>>>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>>>>  	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>>> +	unsigned long ret = 0;
>>>>>  
>>>>>  	if (!irq)
>>>>>  		return 0;
>>>>>  
>>>>>  	/* The upper word is RAZ for us. */
>>>>> -	if (addr & 4)
>>>>> -		return 0;
>>>>> +	if (!(addr & 4))
>>>>> +		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>>>>  
>>>>> -	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
>>>>> +	vgic_put_irq(vcpu->kvm, irq);
>>>>> +	return ret;
>>>>>  }
>>>>>  
>>>>>  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>>> @@ -96,15 +98,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>>>  				    unsigned long val)
>>>>>  {
>>>>>  	int intid = VGIC_ADDR_TO_INTID(addr, 64);
>>>>> -	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>>> -
>>>>> -	if (!irq)
>>>>> -		return;
>>>>> +	struct vgic_irq *irq;
>>>>>  
>>>>>  	/* The upper word is WI for us since we don't implement Aff3. */
>>>>>  	if (addr & 4)
>>>>>  		return;
>>>>>  
>>>>> +	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
>>>>> +
>>>>> +	if (!irq)
>>>>> +		return;
>>>>> +
>>>>>  	spin_lock(&irq->irq_lock);
>>>>>  
>>>>>  	/* We only care about and preserve Aff0, Aff1 and Aff2. */
>>>>> @@ -112,6 +116,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
>>>>>  	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
>>>>>  
>>>>>  	spin_unlock(&irq->irq_lock);
>>>>> +	vgic_put_irq(vcpu->kvm, irq);
>>>>>  }
>>>>>  
>>>>>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>>>>> @@ -445,5 +450,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
>>>>>  		irq->pending = true;
>>>>>  
>>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>>>>> index 9f6fab7..5e79e01 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-mmio.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>>>>> @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
>>>>>  
>>>>>  		if (irq->enabled)
>>>>>  			value |= (1U << i);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return value;
>>>>> @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
>>>>>  		spin_lock(&irq->irq_lock);
>>>>>  		irq->enabled = true;
>>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
>>>>>  		irq->enabled = false;
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
>>>>>  
>>>>>  		if (irq->pending)
>>>>>  			value |= (1U << i);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return value;
>>>>> @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
>>>>>  			irq->soft_pending = true;
>>>>>  
>>>>>  		vgic_queue_irq_unlock(vcpu->kvm, irq);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
>>>>>  		}
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
>>>>>  
>>>>>  		if (irq->active)
>>>>>  			value |= (1U << i);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return value;
>>>>> @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
>>>>>  	for_each_set_bit(i, &val, len * 8) {
>>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  		vgic_mmio_change_active(vcpu, irq, false);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  	vgic_change_active_finish(vcpu, intid);
>>>>>  }
>>>>> @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
>>>>>  	for_each_set_bit(i, &val, len * 8) {
>>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  		vgic_mmio_change_active(vcpu, irq, true);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  	vgic_change_active_finish(vcpu, intid);
>>>>>  }
>>>>> @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
>>>>>  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  
>>>>>  		val |= (u64)irq->priority << (i * 8);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return val;
>>>>> @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
>>>>>  		/* Narrow the priority range to what we actually support */
>>>>>  		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
>>>>>  
>>>>>  		if (irq->config == VGIC_CONFIG_EDGE)
>>>>>  			value |= (2U << (i * 2));
>>>>> +
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  
>>>>>  	return value;
>>>>> @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>>>  	int i;
>>>>>  
>>>>>  	for (i = 0; i < len * 4; i++) {
>>>>> -		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>> +		struct vgic_irq *irq;
>>>>>  
>>>>>  		/*
>>>>>  		 * The configuration cannot be changed for SGIs in general,
>>>>> @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
>>>>>  		if (intid + i < VGIC_NR_PRIVATE_IRQS)
>>>>>  			continue;
>>>>>  
>>>>> +		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>>>>>  		spin_lock(&irq->irq_lock);
>>>>> +
>>>>>  		if (test_bit(i * 2 + 1, &val)) {
>>>>>  			irq->config = VGIC_CONFIG_EDGE;
>>>>>  		} else {
>>>>>  			irq->config = VGIC_CONFIG_LEVEL;
>>>>>  			irq->pending = irq->line_level | irq->soft_pending;
>>>>>  		}
>>>>> +
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
>>>>> index 079bf67..0bf6709 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-v2.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-v2.c
>>>>> @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
>>>>>  		}
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
>>>>> index e48a22e..f0ac064 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic-v3.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic-v3.c
>>>>> @@ -113,6 +113,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
>>>>>  		}
>>>>>  
>>>>>  		spin_unlock(&irq->irq_lock);
>>>>> +		vgic_put_irq(vcpu->kvm, irq);
>>>>>  	}
>>>>>  }
>>>>>  
>>>>> diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
>>>>> index 69b61ab..ae80894 100644
>>>>> --- a/virt/kvm/arm/vgic/vgic.c
>>>>> +++ b/virt/kvm/arm/vgic/vgic.c
>>>>> @@ -48,13 +48,20 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
>>>>>  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>>>  			      u32 intid)
>>>>>  {
>>>>> -	/* SGIs and PPIs */
>>>>> -	if (intid <= VGIC_MAX_PRIVATE)
>>>>> -		return &vcpu->arch.vgic_cpu.private_irqs[intid];
>>>>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>>>>> +	struct vgic_irq *irq;
>>>>>  
>>>>> -	/* SPIs */
>>>>> -	if (intid <= VGIC_MAX_SPI)
>>>>> -		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
>>>>> +	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
>>>>> +		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
>>>>> +		kref_get(&irq->refcount);
>>>>> +		return irq;
>>>>> +	}
>>>>> +
>>>>> +	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
>>>>> +		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
>>>>> +		kref_get(&irq->refcount);
>>>>> +		return irq;
>>>>> +	}
>>>>>  
>>>>>  	/* LPIs are not yet covered */
>>>>>  	if (intid >= VGIC_MIN_LPI)
>>>>> @@ -64,6 +71,17 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
>>>>>  	return NULL;
>>>>>  }
>>>>>  
>>>>> +/* The refcount should never drop to 0 at the moment. */
>>>>> +static void vgic_irq_release(struct kref *ref)
>>>>> +{
>>>>> +	WARN_ON(1);
>>>>> +}
>>>>> +
>>>>> +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
>>>>> +{
>>>>> +	kref_put(&irq->refcount, vgic_irq_release);
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * kvm_vgic_target_oracle - compute the target vcpu for an irq
>>>>>   *
>>>>> @@ -236,6 +254,7 @@ retry:
>>>>>  		goto retry;
>>>>>  	}
>>>>>  
>>>>> +	kref_get(&irq->refcount);
>>>>
>>>> Could you use vgic_get_irq() instead? I know it is slightly overkill,
>>>> but I can already tell that we'll need to add some tracing in both the
>>>> put and get helpers in order to do some debugging. Having straight
>>>> kref_get/put is going to make this tracing difficult, so let's not go there.
>>>
>>> I'd rather not.
>>> 1) Putting the IRQ on the ap_list is the "other user" of the
>>> refcounting, I don't want to mix that unnecessarily with the
>>> vgic_get_irq() (as in: get the struct by the number) use case. That may
>>> actually help tracing, since we can have separate tracepoints to
>>> distinguish them.
>>
>> And yet you end-up doing a vgic_put_irq() in the fold operation. Which
>> is wrong, by the way, as the interrupt is still in in ap_list. This
>> should be moved to the prune operation.
>>
>>> 2) This would violate the locking order, since we hold the irq_lock here
>>> and possibly take the lpi_list_lock in vgic_get_irq().
>>> I don't think we can or should drop the irq_lock and re-take it just for
>>> this.
>>
>> That's a much more convincing argument. And when you take the above into
>> account, you realize that your locking is not correct. You shouldn't be
>> dropping the refcount in fold, but in prune, meaning that you're holding
>> the ap_lock and the irq_lock, same as when you inserted the interrupt in
>> the list.
>>
>> This is outlining an essential principle: if your locking/refcounting is
>> not symmetric, it is likely that you're doing something wrong, and that
>> should bother you really badly.
> 
> Can you point me to the exact location where it's not symmetric?
> I just looked at it again and can't find the issue.
> I "put" it in v[23]_fold because we did a vgic_get_irq a few lines
> before to translate the LR's intid into our struct vgic_irq pointer.

Right. I misread that one, apologies.

> The vgic_get_irq() call isn't in this patch, because we used it already
> before and this patch is just adding the respective puts.
> The only asymmetry I could find is the expected one when it comes to and
> goes from the ap_list.

So I assume this is the pendent of the kref_get call?

@@ -386,6 +412,7 @@ retry:
 			list_del(&irq->ap_list);
 			irq->vcpu = NULL;
 			spin_unlock(&irq->irq_lock);
+			vgic_put_irq(vcpu->kvm, irq);
 			continue;

If that's the case, please add a comment, because this is really hard to
find out which vgic_put_irq() balances with a kref_get() and not a
vgic_get_irq().

Thanks,

	M.
Andre Przywara July 8, 2016, 1:14 p.m. UTC | #7
On 08/07/16 14:09, Marc Zyngier wrote:
> On 08/07/16 13:54, André Przywara wrote:
>> On 08/07/16 11:50, Marc Zyngier wrote:
>>> On 08/07/16 11:28, Andre Przywara wrote:
>>>> Hi,
>>>>
>>>> On 07/07/16 16:00, Marc Zyngier wrote:
>>>>> On 05/07/16 12:22, Andre Przywara wrote:

....

>>>>>> @@ -236,6 +254,7 @@ retry:
>>>>>>  		goto retry;
>>>>>>  	}
>>>>>>  
>>>>>> +	kref_get(&irq->refcount);
>>>>>
>>>>> Could you use vgic_get_irq() instead? I know it is slightly overkill,
>>>>> but I can already tell that we'll need to add some tracing in both the
>>>>> put and get helpers in order to do some debugging. Having straight
>>>>> kref_get/put is going to make this tracing difficult, so let's not go there.
>>>>
>>>> I'd rather not.
>>>> 1) Putting the IRQ on the ap_list is the "other user" of the
>>>> refcounting, I don't want to mix that unnecessarily with the
>>>> vgic_get_irq() (as in: get the struct by the number) use case. That may
>>>> actually help tracing, since we can have separate tracepoints to
>>>> distinguish them.
>>>
>>> And yet you end-up doing a vgic_put_irq() in the fold operation. Which
>>> is wrong, by the way, as the interrupt is still in in ap_list. This
>>> should be moved to the prune operation.
>>>
>>>> 2) This would violate the locking order, since we hold the irq_lock here
>>>> and possibly take the lpi_list_lock in vgic_get_irq().
>>>> I don't think we can or should drop the irq_lock and re-take it just for
>>>> this.
>>>
>>> That's a much more convincing argument. And when you take the above into
>>> account, you realize that your locking is not correct. You shouldn't be
>>> dropping the refcount in fold, but in prune, meaning that you're holding
>>> the ap_lock and the irq_lock, same as when you inserted the interrupt in
>>> the list.
>>>
>>> This is outlining an essential principle: if your locking/refcounting is
>>> not symmetric, it is likely that you're doing something wrong, and that
>>> should bother you really badly.
>>
>> Can you point me to the exact location where it's not symmetric?
>> I just looked at it again and can't find the issue.
>> I "put" it in v[23]_fold because we did a vgic_get_irq a few lines
>> before to translate the LR's intid into our struct vgic_irq pointer.
> 
> Right. I misread that one, apologies.
> 
>> The vgic_get_irq() call isn't in this patch, because we used it already
>> before and this patch is just adding the respective puts.
>> The only asymmetry I could find is the expected one when it comes to and
>> goes from the ap_list.
> 
> So I assume this is the pendent of the kref_get call?

Yes.

> 
> @@ -386,6 +412,7 @@ retry:
>  			list_del(&irq->ap_list);
>  			irq->vcpu = NULL;
>  			spin_unlock(&irq->irq_lock);
> +			vgic_put_irq(vcpu->kvm, irq);
>  			continue;
> 
> If that's the case, please add a comment, because this is really hard to
> find out which vgic_put_irq() balances with a kref_get() and not a
> vgic_get_irq().

Yes, that's what I thought as well just _after_ having hit the Send
button ...

I think much of the confusion stems from the fact that we used
vgic_get_irq() before, only that it wasn't a "get" in the refcounting
get-put sense.

Cheers,
Andre.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 5142e2a..450b4da 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -96,6 +96,7 @@  struct vgic_irq {
 	bool active;			/* not used for LPIs */
 	bool enabled;
 	bool hw;			/* Tied to HW IRQ */
+	struct kref refcount;		/* Used for LPIs */
 	u32 hwintid;			/* HW INTID number */
 	union {
 		u8 targets;			/* GICv2 target VCPUs mask */
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 90cae48..ac3c1a5 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -177,6 +177,7 @@  static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
 		spin_lock_init(&irq->irq_lock);
 		irq->vcpu = NULL;
 		irq->target_vcpu = vcpu0;
+		kref_init(&irq->refcount);
 		if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
 			irq->targets = 0;
 		else
@@ -211,6 +212,7 @@  static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 		irq->vcpu = NULL;
 		irq->target_vcpu = vcpu;
 		irq->targets = 1U << vcpu->vcpu_id;
+		kref_init(&irq->refcount);
 		if (vgic_irq_is_sgi(i)) {
 			/* SGIs */
 			irq->enabled = 1;
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
index a213936..4152348 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v2.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -102,6 +102,7 @@  static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
 		irq->source |= 1U << source_vcpu->vcpu_id;
 
 		vgic_queue_irq_unlock(source_vcpu->kvm, irq);
+		vgic_put_irq(source_vcpu->kvm, irq);
 	}
 }
 
@@ -116,6 +117,8 @@  static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
 		val |= (u64)irq->targets << (i * 8);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return val;
@@ -143,6 +146,7 @@  static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
 		irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -157,6 +161,8 @@  static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
 		val |= (u64)irq->source << (i * 8);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 	return val;
 }
@@ -178,6 +184,7 @@  static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
 			irq->pending = false;
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -201,6 +208,7 @@  static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
 		} else {
 			spin_unlock(&irq->irq_lock);
 		}
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index fc7b6c9..bfcafbd 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -80,15 +80,17 @@  static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
 {
 	int intid = VGIC_ADDR_TO_INTID(addr, 64);
 	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+	unsigned long ret = 0;
 
 	if (!irq)
 		return 0;
 
 	/* The upper word is RAZ for us. */
-	if (addr & 4)
-		return 0;
+	if (!(addr & 4))
+		ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
 
-	return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
+	vgic_put_irq(vcpu->kvm, irq);
+	return ret;
 }
 
 static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
@@ -96,15 +98,17 @@  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
 				    unsigned long val)
 {
 	int intid = VGIC_ADDR_TO_INTID(addr, 64);
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
-
-	if (!irq)
-		return;
+	struct vgic_irq *irq;
 
 	/* The upper word is WI for us since we don't implement Aff3. */
 	if (addr & 4)
 		return;
 
+	irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+
+	if (!irq)
+		return;
+
 	spin_lock(&irq->irq_lock);
 
 	/* We only care about and preserve Aff0, Aff1 and Aff2. */
@@ -112,6 +116,7 @@  static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
 	irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
 
 	spin_unlock(&irq->irq_lock);
+	vgic_put_irq(vcpu->kvm, irq);
 }
 
 static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
@@ -445,5 +450,6 @@  void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
 		irq->pending = true;
 
 		vgic_queue_irq_unlock(vcpu->kvm, irq);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 9f6fab7..5e79e01 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -56,6 +56,8 @@  unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
 
 		if (irq->enabled)
 			value |= (1U << i);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return value;
@@ -74,6 +76,8 @@  void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
 		spin_lock(&irq->irq_lock);
 		irq->enabled = true;
 		vgic_queue_irq_unlock(vcpu->kvm, irq);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -92,6 +96,7 @@  void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
 		irq->enabled = false;
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -108,6 +113,8 @@  unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
 
 		if (irq->pending)
 			value |= (1U << i);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return value;
@@ -129,6 +136,7 @@  void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
 			irq->soft_pending = true;
 
 		vgic_queue_irq_unlock(vcpu->kvm, irq);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -152,6 +160,7 @@  void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
 		}
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -168,6 +177,8 @@  unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
 
 		if (irq->active)
 			value |= (1U << i);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return value;
@@ -242,6 +253,7 @@  void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
 	for_each_set_bit(i, &val, len * 8) {
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 		vgic_mmio_change_active(vcpu, irq, false);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 	vgic_change_active_finish(vcpu, intid);
 }
@@ -257,6 +269,7 @@  void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
 	for_each_set_bit(i, &val, len * 8) {
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 		vgic_mmio_change_active(vcpu, irq, true);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 	vgic_change_active_finish(vcpu, intid);
 }
@@ -272,6 +285,8 @@  unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
 		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 
 		val |= (u64)irq->priority << (i * 8);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return val;
@@ -298,6 +313,8 @@  void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
 		/* Narrow the priority range to what we actually support */
 		irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
 		spin_unlock(&irq->irq_lock);
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
@@ -313,6 +330,8 @@  unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
 
 		if (irq->config == VGIC_CONFIG_EDGE)
 			value |= (2U << (i * 2));
+
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 
 	return value;
@@ -326,7 +345,7 @@  void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
 	int i;
 
 	for (i = 0; i < len * 4; i++) {
-		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+		struct vgic_irq *irq;
 
 		/*
 		 * The configuration cannot be changed for SGIs in general,
@@ -337,14 +356,18 @@  void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
 		if (intid + i < VGIC_NR_PRIVATE_IRQS)
 			continue;
 
+		irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
 		spin_lock(&irq->irq_lock);
+
 		if (test_bit(i * 2 + 1, &val)) {
 			irq->config = VGIC_CONFIG_EDGE;
 		} else {
 			irq->config = VGIC_CONFIG_LEVEL;
 			irq->pending = irq->line_level | irq->soft_pending;
 		}
+
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 079bf67..0bf6709 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -124,6 +124,7 @@  void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 		}
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index e48a22e..f0ac064 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -113,6 +113,7 @@  void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
 		}
 
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(vcpu->kvm, irq);
 	}
 }
 
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 69b61ab..ae80894 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -48,13 +48,20 @@  struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 			      u32 intid)
 {
-	/* SGIs and PPIs */
-	if (intid <= VGIC_MAX_PRIVATE)
-		return &vcpu->arch.vgic_cpu.private_irqs[intid];
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	struct vgic_irq *irq;
 
-	/* SPIs */
-	if (intid <= VGIC_MAX_SPI)
-		return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
+	if (intid <= VGIC_MAX_PRIVATE) {        /* SGIs and PPIs */
+		irq = &vcpu->arch.vgic_cpu.private_irqs[intid];
+		kref_get(&irq->refcount);
+		return irq;
+	}
+
+	if (intid <= VGIC_MAX_SPI) {            /* SPIs */
+		irq = &dist->spis[intid - VGIC_NR_PRIVATE_IRQS];
+		kref_get(&irq->refcount);
+		return irq;
+	}
 
 	/* LPIs are not yet covered */
 	if (intid >= VGIC_MIN_LPI)
@@ -64,6 +71,17 @@  struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 	return NULL;
 }
 
+/* The refcount should never drop to 0 at the moment. */
+static void vgic_irq_release(struct kref *ref)
+{
+	WARN_ON(1);
+}
+
+void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
+{
+	kref_put(&irq->refcount, vgic_irq_release);
+}
+
 /**
  * kvm_vgic_target_oracle - compute the target vcpu for an irq
  *
@@ -236,6 +254,7 @@  retry:
 		goto retry;
 	}
 
+	kref_get(&irq->refcount);
 	list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
 	irq->vcpu = vcpu;
 
@@ -269,14 +288,17 @@  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 	if (!irq)
 		return -EINVAL;
 
-	if (irq->hw != mapped_irq)
+	if (irq->hw != mapped_irq) {
+		vgic_put_irq(kvm, irq);
 		return -EINVAL;
+	}
 
 	spin_lock(&irq->irq_lock);
 
 	if (!vgic_validate_injection(irq, level)) {
 		/* Nothing to see here, move along... */
 		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(kvm, irq);
 		return 0;
 	}
 
@@ -288,6 +310,7 @@  static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 	}
 
 	vgic_queue_irq_unlock(kvm, irq);
+	vgic_put_irq(kvm, irq);
 
 	return 0;
 }
@@ -330,25 +353,28 @@  int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
 	irq->hwintid = phys_irq;
 
 	spin_unlock(&irq->irq_lock);
+	vgic_put_irq(vcpu->kvm, irq);
 
 	return 0;
 }
 
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
 {
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
-
-	BUG_ON(!irq);
+	struct vgic_irq *irq;
 
 	if (!vgic_initialized(vcpu->kvm))
 		return -EAGAIN;
 
+	irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+	BUG_ON(!irq);
+
 	spin_lock(&irq->irq_lock);
 
 	irq->hw = false;
 	irq->hwintid = 0;
 
 	spin_unlock(&irq->irq_lock);
+	vgic_put_irq(vcpu->kvm, irq);
 
 	return 0;
 }
@@ -386,6 +412,7 @@  retry:
 			list_del(&irq->ap_list);
 			irq->vcpu = NULL;
 			spin_unlock(&irq->irq_lock);
+			vgic_put_irq(vcpu->kvm, irq);
 			continue;
 		}
 
@@ -614,6 +641,7 @@  bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
 	spin_lock(&irq->irq_lock);
 	map_is_active = irq->hw && irq->active;
 	spin_unlock(&irq->irq_lock);
+	vgic_put_irq(vcpu->kvm, irq);
 
 	return map_is_active;
 }
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index c752152..5b79c34 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -38,6 +38,7 @@  struct vgic_vmcr {
 
 struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
 			      u32 intid);
+void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq);
 bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
 void vgic_kick_vcpus(struct kvm *kvm);