diff mbox series

[1/8] KVM: arm/arm64: vgic: Add LPI translation cache definition

Message ID 20190606165455.162478-2-marc.zyngier@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm/arm64: vgic: ITS translation cache | expand

Commit Message

Marc Zyngier June 6, 2019, 4:54 p.m. UTC
Add the basic data structure that expresses an MSI to LPI
translation as well as the allocation/release hooks.

THe size of the cache is arbitrarily defined as 4*nr_vcpus.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/kvm/arm_vgic.h        | 10 ++++++++++
 virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic/vgic-its.c  |  2 ++
 virt/kvm/arm/vgic/vgic.h      |  3 +++
 4 files changed, 49 insertions(+)

Comments

Ali Saidi June 7, 2019, 3:47 a.m. UTC | #1
On 6/6/19, 11:55 AM, "Marc Zyngier" <marc.zyngier@arm.com> wrote:

    Add the basic data structure that expresses an MSI to LPI
    translation as well as the allocation/release hooks.
    
    THe size of the cache is arbitrarily defined as 4*nr_vcpus.

A cache size of 8/vCPU should result in cache hits in most cases and 16/vCPU will pretty much always result in a cache hit.

Ali
Marc Zyngier June 7, 2019, 7:38 a.m. UTC | #2
On Fri, 07 Jun 2019 04:47:29 +0100,
"Saidi, Ali" <alisaidi@amazon.com> wrote:
> 
> 
> On 6/6/19, 11:55 AM, "Marc Zyngier" <marc.zyngier@arm.com> wrote:
> 
>     Add the basic data structure that expresses an MSI to LPI
>     translation as well as the allocation/release hooks.
>     
>     THe size of the cache is arbitrarily defined as 4*nr_vcpus.
> 
> A cache size of 8/vCPU should result in cache hits in most cases and
> 16/vCPU will pretty much always result in a cache hit.

What is this interesting observation based on? On the face of it, this
is just as random as what I have already.

Thanks,

	M.
Julien Thierry June 7, 2019, 8:12 a.m. UTC | #3
Hi Marc,

On 06/06/2019 17:54, Marc Zyngier wrote:
> Add the basic data structure that expresses an MSI to LPI
> translation as well as the allocation/release hooks.
> 
> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
> 

Since this arbitrary and that people migh want to try it with different
size, could the number of (per vCPU) ITS translation cache entries be
passed as a kernel parameter?

> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  include/kvm/arm_vgic.h        | 10 ++++++++++
>  virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic/vgic-its.c  |  2 ++
>  virt/kvm/arm/vgic/vgic.h      |  3 +++
>  4 files changed, 49 insertions(+)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index c36c86f1ec9a..5a0d6b07c5ef 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -173,6 +173,14 @@ struct vgic_io_device {
>  	struct kvm_io_device dev;
>  };
>  
> +struct vgic_translation_cache_entry {
> +	struct list_head	entry;
> +	phys_addr_t		db;
> +	u32			devid;
> +	u32			eventid;
> +	struct vgic_irq		*irq;
> +};
> +
>  struct vgic_its {
>  	/* The base address of the ITS control register frame */
>  	gpa_t			vgic_its_base;
> @@ -260,6 +268,8 @@ struct vgic_dist {
>  	struct list_head	lpi_list_head;
>  	int			lpi_list_count;
>  
> +	struct list_head	lpi_translation_cache;
> +
>  	/* used by vgic-debug */
>  	struct vgic_state_iter *iter;
>  
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 3bdb31eaed64..25ae25694a28 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  
>  	INIT_LIST_HEAD(&dist->lpi_list_head);
> +	INIT_LIST_HEAD(&dist->lpi_translation_cache);
>  	raw_spin_lock_init(&dist->lpi_list_lock);
>  }
>  
> @@ -260,6 +261,27 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
>  		vgic_v3_enable(vcpu);
>  }
>  
> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
> +{
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +	int i;
> +
> +	if (!list_empty(&dist->lpi_translation_cache))
> +		return;
> +
> +	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
> +		struct vgic_translation_cache_entry *cte;
> +
> +		/* An allocation failure is not fatal */
> +		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
> +		if (WARN_ON(!cte))
> +			break;
> +
> +		INIT_LIST_HEAD(&cte->entry);
> +		list_add(&cte->entry, &dist->lpi_translation_cache);
> +	}
> +}
> +
>  /*
>   * vgic_init: allocates and initializes dist and vcpu data structures
>   * depending on two dimensioning parameters:
> @@ -305,6 +327,7 @@ int vgic_init(struct kvm *kvm)
>  	}
>  
>  	if (vgic_has_its(kvm)) {
> +		vgic_lpi_translation_cache_init(kvm);
>  		ret = vgic_v4_init(kvm);
>  		if (ret)
>  			goto out;
> @@ -346,6 +369,17 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>  		INIT_LIST_HEAD(&dist->rd_regions);
>  	}
>  
> +	if (vgic_has_its(kvm)) {
> +		struct vgic_translation_cache_entry *cte, *tmp;
> +
> +		list_for_each_entry_safe(cte, tmp,
> +					 &dist->lpi_translation_cache, entry) {
> +			list_del(&cte->entry);
> +			kfree(cte);
> +		}
> +		INIT_LIST_HEAD(&dist->lpi_translation_cache);

I would expect that removing all entries from a list would leave that
list as a "clean" empty list. Is INIT_LIST_HEAD() really needed here?

> +	}
> +
>  	if (vgic_supports_direct_msis(kvm))
>  		vgic_v4_teardown(kvm);
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 44ceaccb18cf..5758504fd934 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -1696,6 +1696,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
>  			kfree(its);
>  			return ret;
>  		}
> +
> +		vgic_lpi_translation_cache_init(dev->kvm);

I'm not sure I understand why we need to call that here. Isn't the
single call in vgic_init() enough? Are there cases where the other call
might come to late (I guess I might discover that in the rest of the
series).

>  	}
>  
>  	mutex_init(&its->its_lock);
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index abeeffabc456..a58e1b263dca 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -316,6 +316,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
>  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
>  			 u32 devid, u32 eventid, struct vgic_irq **irq);
>  struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
> +void vgic_lpi_translation_cache_init(struct kvm *kvm);
> +
> +#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
>  
>  bool vgic_supports_direct_msis(struct kvm *kvm);
>  int vgic_v4_init(struct kvm *kvm);
> 

Cheers,
Marc Zyngier June 7, 2019, 8:38 a.m. UTC | #4
Hi Julien,

On 07/06/2019 09:12, Julien Thierry wrote:
> Hi Marc,
> 
> On 06/06/2019 17:54, Marc Zyngier wrote:
>> Add the basic data structure that expresses an MSI to LPI
>> translation as well as the allocation/release hooks.
>>
>> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>>
> 
> Since this arbitrary and that people migh want to try it with different
> size, could the number of (per vCPU) ITS translation cache entries be
> passed as a kernel parameter?

I'm not overly keen on the kernel parameter, but I'm open to this being
an optional parameter at vITS creation time, for particularly creative
use cases... What I'd really want is a way to dynamically dimension it,
but I can't really think of a way.

> 
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  include/kvm/arm_vgic.h        | 10 ++++++++++
>>  virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
>>  virt/kvm/arm/vgic/vgic-its.c  |  2 ++
>>  virt/kvm/arm/vgic/vgic.h      |  3 +++
>>  4 files changed, 49 insertions(+)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index c36c86f1ec9a..5a0d6b07c5ef 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -173,6 +173,14 @@ struct vgic_io_device {
>>  	struct kvm_io_device dev;
>>  };
>>  
>> +struct vgic_translation_cache_entry {
>> +	struct list_head	entry;
>> +	phys_addr_t		db;
>> +	u32			devid;
>> +	u32			eventid;
>> +	struct vgic_irq		*irq;
>> +};
>> +
>>  struct vgic_its {
>>  	/* The base address of the ITS control register frame */
>>  	gpa_t			vgic_its_base;
>> @@ -260,6 +268,8 @@ struct vgic_dist {
>>  	struct list_head	lpi_list_head;
>>  	int			lpi_list_count;
>>  
>> +	struct list_head	lpi_translation_cache;
>> +
>>  	/* used by vgic-debug */
>>  	struct vgic_state_iter *iter;
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>> index 3bdb31eaed64..25ae25694a28 100644
>> --- a/virt/kvm/arm/vgic/vgic-init.c
>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>>  	struct vgic_dist *dist = &kvm->arch.vgic;
>>  
>>  	INIT_LIST_HEAD(&dist->lpi_list_head);
>> +	INIT_LIST_HEAD(&dist->lpi_translation_cache);
>>  	raw_spin_lock_init(&dist->lpi_list_lock);
>>  }
>>  
>> @@ -260,6 +261,27 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
>>  		vgic_v3_enable(vcpu);
>>  }
>>  
>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>> +{
>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>> +	int i;
>> +
>> +	if (!list_empty(&dist->lpi_translation_cache))
>> +		return;
>> +
>> +	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
>> +		struct vgic_translation_cache_entry *cte;
>> +
>> +		/* An allocation failure is not fatal */
>> +		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>> +		if (WARN_ON(!cte))
>> +			break;
>> +
>> +		INIT_LIST_HEAD(&cte->entry);
>> +		list_add(&cte->entry, &dist->lpi_translation_cache);
>> +	}
>> +}
>> +
>>  /*
>>   * vgic_init: allocates and initializes dist and vcpu data structures
>>   * depending on two dimensioning parameters:
>> @@ -305,6 +327,7 @@ int vgic_init(struct kvm *kvm)
>>  	}
>>  
>>  	if (vgic_has_its(kvm)) {
>> +		vgic_lpi_translation_cache_init(kvm);
>>  		ret = vgic_v4_init(kvm);
>>  		if (ret)
>>  			goto out;
>> @@ -346,6 +369,17 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>>  		INIT_LIST_HEAD(&dist->rd_regions);
>>  	}
>>  
>> +	if (vgic_has_its(kvm)) {
>> +		struct vgic_translation_cache_entry *cte, *tmp;
>> +
>> +		list_for_each_entry_safe(cte, tmp,
>> +					 &dist->lpi_translation_cache, entry) {
>> +			list_del(&cte->entry);
>> +			kfree(cte);
>> +		}
>> +		INIT_LIST_HEAD(&dist->lpi_translation_cache);
> 
> I would expect that removing all entries from a list would leave that
> list as a "clean" empty list. Is INIT_LIST_HEAD() really needed here?

You're right, that's a leftover from an earlier debugging session.

> 
>> +	}
>> +
>>  	if (vgic_supports_direct_msis(kvm))
>>  		vgic_v4_teardown(kvm);
>>  }
>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>> index 44ceaccb18cf..5758504fd934 100644
>> --- a/virt/kvm/arm/vgic/vgic-its.c
>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>> @@ -1696,6 +1696,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
>>  			kfree(its);
>>  			return ret;
>>  		}
>> +
>> +		vgic_lpi_translation_cache_init(dev->kvm);
> 
> I'm not sure I understand why we need to call that here. Isn't the
> single call in vgic_init() enough? Are there cases where the other call
> might come to late (I guess I might discover that in the rest of the
> series).

That's because you're allowed to create the ITS after having initialized
the vgic itself (this is guarded with a vgic_initialized() check).
Overall, we follow the same pattern as the GICv4 init. Yes, the API is a
mess.

Thanks,

	M.
Eric Auger June 7, 2019, 12:09 p.m. UTC | #5
Hi Marc,

On 6/6/19 6:54 PM, Marc Zyngier wrote:
> Add the basic data structure that expresses an MSI to LPI
> translation as well as the allocation/release hooks.
> 
> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> ---
>  include/kvm/arm_vgic.h        | 10 ++++++++++
>  virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic/vgic-its.c  |  2 ++
>  virt/kvm/arm/vgic/vgic.h      |  3 +++
>  4 files changed, 49 insertions(+)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index c36c86f1ec9a..5a0d6b07c5ef 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -173,6 +173,14 @@ struct vgic_io_device {
>  	struct kvm_io_device dev;
>  };
>  
> +struct vgic_translation_cache_entry {
> +	struct list_head	entry;
> +	phys_addr_t		db;
it is not obvious to me why you do need the db field? Isn't the LPI
uniquely identfiied by the devid and eventid. If I recall correctly
theorically the architecture allows to handle LPIs even without ITS.
> +	u32			devid;
> +	u32			eventid;
> +	struct vgic_irq		*irq;
> +};
> +
>  struct vgic_its {
>  	/* The base address of the ITS control register frame */
>  	gpa_t			vgic_its_base;
> @@ -260,6 +268,8 @@ struct vgic_dist {
>  	struct list_head	lpi_list_head;
>  	int			lpi_list_count;
>  
> +	struct list_head	lpi_translation_cache;
> +
>  	/* used by vgic-debug */
>  	struct vgic_state_iter *iter;
>  
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index 3bdb31eaed64..25ae25694a28 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  
>  	INIT_LIST_HEAD(&dist->lpi_list_head);
> +	INIT_LIST_HEAD(&dist->lpi_translation_cache);
>  	raw_spin_lock_init(&dist->lpi_list_lock);
>  }
>  
> @@ -260,6 +261,27 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
>  		vgic_v3_enable(vcpu);
>  }
>  
> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
> +{
> +	struct vgic_dist *dist = &kvm->arch.vgic;
> +	int i;
> +
> +	if (!list_empty(&dist->lpi_translation_cache))
> +		return;
> +
> +	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
> +		struct vgic_translation_cache_entry *cte;
> +
> +		/* An allocation failure is not fatal */
> +		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
> +		if (WARN_ON(!cte))
> +			break;
> +
> +		INIT_LIST_HEAD(&cte->entry);
> +		list_add(&cte->entry, &dist->lpi_translation_cache);
> +	}
> +}
> +
>  /*
>   * vgic_init: allocates and initializes dist and vcpu data structures
>   * depending on two dimensioning parameters:
> @@ -305,6 +327,7 @@ int vgic_init(struct kvm *kvm)
>  	}
>  
>  	if (vgic_has_its(kvm)) {
> +		vgic_lpi_translation_cache_init(kvm);
>  		ret = vgic_v4_init(kvm);
>  		if (ret)
>  			goto out;
> @@ -346,6 +369,17 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>  		INIT_LIST_HEAD(&dist->rd_regions);
>  	}
>  
> +	if (vgic_has_its(kvm)) {
> +		struct vgic_translation_cache_entry *cte, *tmp;
> +
> +		list_for_each_entry_safe(cte, tmp,
> +					 &dist->lpi_translation_cache, entry) {
> +			list_del(&cte->entry);
> +			kfree(cte);
> +		}
> +		INIT_LIST_HEAD(&dist->lpi_translation_cache);
> +	}
> +
>  	if (vgic_supports_direct_msis(kvm))
>  		vgic_v4_teardown(kvm);
>  }
> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
> index 44ceaccb18cf..5758504fd934 100644
> --- a/virt/kvm/arm/vgic/vgic-its.c
> +++ b/virt/kvm/arm/vgic/vgic-its.c
> @@ -1696,6 +1696,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
>  			kfree(its);
>  			return ret;
>  		}
> +
> +		vgic_lpi_translation_cache_init(dev->kvm);
>  	}
>  
>  	mutex_init(&its->its_lock);
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index abeeffabc456..a58e1b263dca 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -316,6 +316,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
>  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
>  			 u32 devid, u32 eventid, struct vgic_irq **irq);
>  struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
> +void vgic_lpi_translation_cache_init(struct kvm *kvm);
> +
> +#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
Couldn't the cache be a function of the number of allocated lpis. We
could realloc the list accordingly. I miss why it is rather dependent on
the number of vcpus and not on the number of assigned devices/MSIs?

Thanks

Eric

>  
>  bool vgic_supports_direct_msis(struct kvm *kvm);
>  int vgic_v4_init(struct kvm *kvm);
>
Marc Zyngier June 7, 2019, 12:44 p.m. UTC | #6
Hi Eric,

On 07/06/2019 13:09, Auger Eric wrote:
> Hi Marc,
> 
> On 6/6/19 6:54 PM, Marc Zyngier wrote:
>> Add the basic data structure that expresses an MSI to LPI
>> translation as well as the allocation/release hooks.
>>
>> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> ---
>>  include/kvm/arm_vgic.h        | 10 ++++++++++
>>  virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
>>  virt/kvm/arm/vgic/vgic-its.c  |  2 ++
>>  virt/kvm/arm/vgic/vgic.h      |  3 +++
>>  4 files changed, 49 insertions(+)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index c36c86f1ec9a..5a0d6b07c5ef 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -173,6 +173,14 @@ struct vgic_io_device {
>>  	struct kvm_io_device dev;
>>  };
>>  
>> +struct vgic_translation_cache_entry {
>> +	struct list_head	entry;
>> +	phys_addr_t		db;
> it is not obvious to me why you do need the db field? Isn't the LPI
> uniquely identfiied by the devid and eventid. If I recall correctly
> theorically the architecture allows to handle LPIs even without ITS.

Only having DID+EID is unfortunately not enough, and the translation has
to be per ITS. Think of a system with two ITSs, and a PCI device in
front of each of the ITSs. There is no reason why the two devices would
have different IDs, as they belong to different PCI hierarchies.

So the cache must take the source ITS into account. The alternative
would be to keep a separate cache per ITS, but that would lead to more
overhead on the fast path, having to lookup the ITS first.

As for LPIs without ITS, that wouldn't need a cache at all.

>> +	u32			devid;
>> +	u32			eventid;
>> +	struct vgic_irq		*irq;
>> +};
>> +
>>  struct vgic_its {
>>  	/* The base address of the ITS control register frame */
>>  	gpa_t			vgic_its_base;
>> @@ -260,6 +268,8 @@ struct vgic_dist {
>>  	struct list_head	lpi_list_head;
>>  	int			lpi_list_count;
>>  
>> +	struct list_head	lpi_translation_cache;
>> +
>>  	/* used by vgic-debug */
>>  	struct vgic_state_iter *iter;
>>  
>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>> index 3bdb31eaed64..25ae25694a28 100644
>> --- a/virt/kvm/arm/vgic/vgic-init.c
>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>>  	struct vgic_dist *dist = &kvm->arch.vgic;
>>  
>>  	INIT_LIST_HEAD(&dist->lpi_list_head);
>> +	INIT_LIST_HEAD(&dist->lpi_translation_cache);
>>  	raw_spin_lock_init(&dist->lpi_list_lock);
>>  }
>>  
>> @@ -260,6 +261,27 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
>>  		vgic_v3_enable(vcpu);
>>  }
>>  
>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>> +{
>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>> +	int i;
>> +
>> +	if (!list_empty(&dist->lpi_translation_cache))
>> +		return;
>> +
>> +	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
>> +		struct vgic_translation_cache_entry *cte;
>> +
>> +		/* An allocation failure is not fatal */
>> +		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>> +		if (WARN_ON(!cte))
>> +			break;
>> +
>> +		INIT_LIST_HEAD(&cte->entry);
>> +		list_add(&cte->entry, &dist->lpi_translation_cache);
>> +	}
>> +}
>> +
>>  /*
>>   * vgic_init: allocates and initializes dist and vcpu data structures
>>   * depending on two dimensioning parameters:
>> @@ -305,6 +327,7 @@ int vgic_init(struct kvm *kvm)
>>  	}
>>  
>>  	if (vgic_has_its(kvm)) {
>> +		vgic_lpi_translation_cache_init(kvm);
>>  		ret = vgic_v4_init(kvm);
>>  		if (ret)
>>  			goto out;
>> @@ -346,6 +369,17 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>>  		INIT_LIST_HEAD(&dist->rd_regions);
>>  	}
>>  
>> +	if (vgic_has_its(kvm)) {
>> +		struct vgic_translation_cache_entry *cte, *tmp;
>> +
>> +		list_for_each_entry_safe(cte, tmp,
>> +					 &dist->lpi_translation_cache, entry) {
>> +			list_del(&cte->entry);
>> +			kfree(cte);
>> +		}
>> +		INIT_LIST_HEAD(&dist->lpi_translation_cache);
>> +	}
>> +
>>  	if (vgic_supports_direct_msis(kvm))
>>  		vgic_v4_teardown(kvm);
>>  }
>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>> index 44ceaccb18cf..5758504fd934 100644
>> --- a/virt/kvm/arm/vgic/vgic-its.c
>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>> @@ -1696,6 +1696,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
>>  			kfree(its);
>>  			return ret;
>>  		}
>> +
>> +		vgic_lpi_translation_cache_init(dev->kvm);
>>  	}
>>  
>>  	mutex_init(&its->its_lock);
>> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
>> index abeeffabc456..a58e1b263dca 100644
>> --- a/virt/kvm/arm/vgic/vgic.h
>> +++ b/virt/kvm/arm/vgic/vgic.h
>> @@ -316,6 +316,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
>>  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
>>  			 u32 devid, u32 eventid, struct vgic_irq **irq);
>>  struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
>> +void vgic_lpi_translation_cache_init(struct kvm *kvm);
>> +
>> +#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
> Couldn't the cache be a function of the number of allocated lpis. We
> could realloc the list accordingly. I miss why it is rather dependent on
> the number of vcpus and not on the number of assigned devices/MSIs?

How do you find out about the number of LPIs? That's really for the
guest to decide what it wants to do. Also, KVM itself doesn't have much
of a clue about the number of assigned devices or their MSI capability.
That's why I've suggested that userspace could be involved here.

So far, I've used the number of vcpus as MSIs are usually used to deal
with per-CPU queues. This allows the cache to scale with the number of
queues that the guest is expected to deal with. Ali's reply earlier seem
to indicate that this is a common pattern, but it is the multiplying
factor that is hard to express.

Thanks,

	M.
Eric Auger June 7, 2019, 2:15 p.m. UTC | #7
Hi Marc,
On 6/7/19 2:44 PM, Marc Zyngier wrote:
> Hi Eric,
> 
> On 07/06/2019 13:09, Auger Eric wrote:
>> Hi Marc,
>>
>> On 6/6/19 6:54 PM, Marc Zyngier wrote:
>>> Add the basic data structure that expresses an MSI to LPI
>>> translation as well as the allocation/release hooks.
>>>
>>> THe size of the cache is arbitrarily defined as 4*nr_vcpus.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> ---
>>>  include/kvm/arm_vgic.h        | 10 ++++++++++
>>>  virt/kvm/arm/vgic/vgic-init.c | 34 ++++++++++++++++++++++++++++++++++
>>>  virt/kvm/arm/vgic/vgic-its.c  |  2 ++
>>>  virt/kvm/arm/vgic/vgic.h      |  3 +++
>>>  4 files changed, 49 insertions(+)
>>>
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index c36c86f1ec9a..5a0d6b07c5ef 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -173,6 +173,14 @@ struct vgic_io_device {
>>>  	struct kvm_io_device dev;
>>>  };
>>>  
>>> +struct vgic_translation_cache_entry {
>>> +	struct list_head	entry;
>>> +	phys_addr_t		db;
>> it is not obvious to me why you do need the db field? Isn't the LPI
>> uniquely identfiied by the devid and eventid. If I recall correctly
>> theorically the architecture allows to handle LPIs even without ITS.
> 
> Only having DID+EID is unfortunately not enough, and the translation has
> to be per ITS. Think of a system with two ITSs, and a PCI device in
> front of each of the ITSs. There is no reason why the two devices would
> have different IDs, as they belong to different PCI hierarchies.
> 
> So the cache must take the source ITS into account. The alternative
> would be to keep a separate cache per ITS, but that would lead to more
> overhead on the fast path, having to lookup the ITS first.

Yes you're right. In the meantime I double checked the IORT spec and the
deviceid only is unique within an ITS group node. But there can be
several group nodes.

"ITS group nodes describe which ITS units are in the system. A node
allows grouping of more than one ITS, but all ITSs in the group must
share a common understanding of DeviceID values. That is, a given
DeviceID must represent the same device for all ITS units in the group."
> 
> As for LPIs without ITS, that wouldn't need a cache at all.
> 
>>> +	u32			devid;
>>> +	u32			eventid;
>>> +	struct vgic_irq		*irq;
>>> +};
>>> +
>>>  struct vgic_its {
>>>  	/* The base address of the ITS control register frame */
>>>  	gpa_t			vgic_its_base;
>>> @@ -260,6 +268,8 @@ struct vgic_dist {
>>>  	struct list_head	lpi_list_head;
>>>  	int			lpi_list_count;
>>>  
>>> +	struct list_head	lpi_translation_cache;
>>> +
>>>  	/* used by vgic-debug */
>>>  	struct vgic_state_iter *iter;
>>>  
>>> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
>>> index 3bdb31eaed64..25ae25694a28 100644
>>> --- a/virt/kvm/arm/vgic/vgic-init.c
>>> +++ b/virt/kvm/arm/vgic/vgic-init.c
>>> @@ -64,6 +64,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
>>>  	struct vgic_dist *dist = &kvm->arch.vgic;
>>>  
>>>  	INIT_LIST_HEAD(&dist->lpi_list_head);
>>> +	INIT_LIST_HEAD(&dist->lpi_translation_cache);
>>>  	raw_spin_lock_init(&dist->lpi_list_lock);
>>>  }
>>>  
>>> @@ -260,6 +261,27 @@ static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
>>>  		vgic_v3_enable(vcpu);
>>>  }
>>>  
>>> +void vgic_lpi_translation_cache_init(struct kvm *kvm)
>>> +{
>>> +	struct vgic_dist *dist = &kvm->arch.vgic;
>>> +	int i;
>>> +
>>> +	if (!list_empty(&dist->lpi_translation_cache))
>>> +		return;
>>> +
>>> +	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
>>> +		struct vgic_translation_cache_entry *cte;
>>> +
>>> +		/* An allocation failure is not fatal */
>>> +		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
>>> +		if (WARN_ON(!cte))
>>> +			break;
>>> +
>>> +		INIT_LIST_HEAD(&cte->entry);
>>> +		list_add(&cte->entry, &dist->lpi_translation_cache);
>>> +	}
>>> +}
>>> +
>>>  /*
>>>   * vgic_init: allocates and initializes dist and vcpu data structures
>>>   * depending on two dimensioning parameters:
>>> @@ -305,6 +327,7 @@ int vgic_init(struct kvm *kvm)
>>>  	}
>>>  
>>>  	if (vgic_has_its(kvm)) {
>>> +		vgic_lpi_translation_cache_init(kvm);
>>>  		ret = vgic_v4_init(kvm);
>>>  		if (ret)
>>>  			goto out;
>>> @@ -346,6 +369,17 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>>>  		INIT_LIST_HEAD(&dist->rd_regions);
>>>  	}
>>>  
>>> +	if (vgic_has_its(kvm)) {
>>> +		struct vgic_translation_cache_entry *cte, *tmp;
>>> +
>>> +		list_for_each_entry_safe(cte, tmp,
>>> +					 &dist->lpi_translation_cache, entry) {
>>> +			list_del(&cte->entry);
>>> +			kfree(cte);
>>> +		}
>>> +		INIT_LIST_HEAD(&dist->lpi_translation_cache);
>>> +	}
>>> +
>>>  	if (vgic_supports_direct_msis(kvm))
>>>  		vgic_v4_teardown(kvm);
>>>  }
>>> diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
>>> index 44ceaccb18cf..5758504fd934 100644
>>> --- a/virt/kvm/arm/vgic/vgic-its.c
>>> +++ b/virt/kvm/arm/vgic/vgic-its.c
>>> @@ -1696,6 +1696,8 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
>>>  			kfree(its);
>>>  			return ret;
>>>  		}
>>> +
>>> +		vgic_lpi_translation_cache_init(dev->kvm);
>>>  	}
>>>  
>>>  	mutex_init(&its->its_lock);
>>> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
>>> index abeeffabc456..a58e1b263dca 100644
>>> --- a/virt/kvm/arm/vgic/vgic.h
>>> +++ b/virt/kvm/arm/vgic/vgic.h
>>> @@ -316,6 +316,9 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
>>>  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
>>>  			 u32 devid, u32 eventid, struct vgic_irq **irq);
>>>  struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
>>> +void vgic_lpi_translation_cache_init(struct kvm *kvm);
>>> +
>>> +#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
>> Couldn't the cache be a function of the number of allocated lpis. We
>> could realloc the list accordingly. I miss why it is rather dependent on
>> the number of vcpus and not on the number of assigned devices/MSIs?
> 
> How do you find out about the number of LPIs? That's really for the
> guest to decide what it wants to do. Also, KVM itself doesn't have much
> of a clue about the number of assigned devices or their MSI capability.
> That's why I've suggested that userspace could be involved here.

Can't we setup an heuristic based on dist->lpi_list_count incremented on
vgic_add_lpi() used on MAPI/MAPTI? Of course not all of them are
assigned device ones. But currently the cache is being used for all LPIs
including those triggered through the user space injection (KVM_SIGNAL_MSI).

Otherwise there is an existing interface between KVM and VFIO that may
be leveraged to pass info between both?

Thanks

Eric
> 
> So far, I've used the number of vcpus as MSIs are usually used to deal
> with per-CPU queues. This allows the cache to scale with the number of
> queues that the guest is expected to deal with. Ali's reply earlier seem
> to indicate that this is a common pattern, but it is the multiplying
> factor that is hard to express.
> 
> Thanks,
> 
> 	M.
>
Marc Zyngier June 7, 2019, 3:04 p.m. UTC | #8
On 07/06/2019 15:15, Auger Eric wrote:
> Hi Marc,
> On 6/7/19 2:44 PM, Marc Zyngier wrote:
>> Hi Eric,
>>
>> On 07/06/2019 13:09, Auger Eric wrote:
>>> Hi Marc,

[...]

>>>> +#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
>>> Couldn't the cache be a function of the number of allocated lpis. We
>>> could realloc the list accordingly. I miss why it is rather dependent on
>>> the number of vcpus and not on the number of assigned devices/MSIs?
>>
>> How do you find out about the number of LPIs? That's really for the
>> guest to decide what it wants to do. Also, KVM itself doesn't have much
>> of a clue about the number of assigned devices or their MSI capability.
>> That's why I've suggested that userspace could be involved here.
> 
> Can't we setup an heuristic based on dist->lpi_list_count incremented on
> vgic_add_lpi() used on MAPI/MAPTI? Of course not all of them are
> assigned device ones. But currently the cache is being used for all LPIs
> including those triggered through the user space injection (KVM_SIGNAL_MSI).

I'm happy to grow the cache on MAPI, but that doesn't solve the real
problem: how do we cap it to a value that is "good enough"?

> Otherwise there is an existing interface between KVM and VFIO that may
> be leveraged to pass info between both?

Same thing. The problem is in defining the limit. I guess only people
deploying real workloads can tell us what is a reasonable default, and
we can also make that a tuneable parameter...

	M.
diff mbox series

Patch

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index c36c86f1ec9a..5a0d6b07c5ef 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -173,6 +173,14 @@  struct vgic_io_device {
 	struct kvm_io_device dev;
 };
 
+struct vgic_translation_cache_entry {
+	struct list_head	entry;
+	phys_addr_t		db;
+	u32			devid;
+	u32			eventid;
+	struct vgic_irq		*irq;
+};
+
 struct vgic_its {
 	/* The base address of the ITS control register frame */
 	gpa_t			vgic_its_base;
@@ -260,6 +268,8 @@  struct vgic_dist {
 	struct list_head	lpi_list_head;
 	int			lpi_list_count;
 
+	struct list_head	lpi_translation_cache;
+
 	/* used by vgic-debug */
 	struct vgic_state_iter *iter;
 
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 3bdb31eaed64..25ae25694a28 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -64,6 +64,7 @@  void kvm_vgic_early_init(struct kvm *kvm)
 	struct vgic_dist *dist = &kvm->arch.vgic;
 
 	INIT_LIST_HEAD(&dist->lpi_list_head);
+	INIT_LIST_HEAD(&dist->lpi_translation_cache);
 	raw_spin_lock_init(&dist->lpi_list_lock);
 }
 
@@ -260,6 +261,27 @@  static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu)
 		vgic_v3_enable(vcpu);
 }
 
+void vgic_lpi_translation_cache_init(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	int i;
+
+	if (!list_empty(&dist->lpi_translation_cache))
+		return;
+
+	for (i = 0; i < LPI_CACHE_SIZE(kvm); i++) {
+		struct vgic_translation_cache_entry *cte;
+
+		/* An allocation failure is not fatal */
+		cte = kzalloc(sizeof(*cte), GFP_KERNEL);
+		if (WARN_ON(!cte))
+			break;
+
+		INIT_LIST_HEAD(&cte->entry);
+		list_add(&cte->entry, &dist->lpi_translation_cache);
+	}
+}
+
 /*
  * vgic_init: allocates and initializes dist and vcpu data structures
  * depending on two dimensioning parameters:
@@ -305,6 +327,7 @@  int vgic_init(struct kvm *kvm)
 	}
 
 	if (vgic_has_its(kvm)) {
+		vgic_lpi_translation_cache_init(kvm);
 		ret = vgic_v4_init(kvm);
 		if (ret)
 			goto out;
@@ -346,6 +369,17 @@  static void kvm_vgic_dist_destroy(struct kvm *kvm)
 		INIT_LIST_HEAD(&dist->rd_regions);
 	}
 
+	if (vgic_has_its(kvm)) {
+		struct vgic_translation_cache_entry *cte, *tmp;
+
+		list_for_each_entry_safe(cte, tmp,
+					 &dist->lpi_translation_cache, entry) {
+			list_del(&cte->entry);
+			kfree(cte);
+		}
+		INIT_LIST_HEAD(&dist->lpi_translation_cache);
+	}
+
 	if (vgic_supports_direct_msis(kvm))
 		vgic_v4_teardown(kvm);
 }
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 44ceaccb18cf..5758504fd934 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1696,6 +1696,8 @@  static int vgic_its_create(struct kvm_device *dev, u32 type)
 			kfree(its);
 			return ret;
 		}
+
+		vgic_lpi_translation_cache_init(dev->kvm);
 	}
 
 	mutex_init(&its->its_lock);
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index abeeffabc456..a58e1b263dca 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -316,6 +316,9 @@  int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 			 u32 devid, u32 eventid, struct vgic_irq **irq);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
+void vgic_lpi_translation_cache_init(struct kvm *kvm);
+
+#define LPI_CACHE_SIZE(kvm)	(atomic_read(&(kvm)->online_vcpus) * 4)
 
 bool vgic_supports_direct_msis(struct kvm *kvm);
 int vgic_v4_init(struct kvm *kvm);