diff mbox series

[v4,7/8] KVM: arm64: vgic-v3: Expose GICR_TYPER.Last for userspace

Message ID 20210401085238.477270-8-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM/ARM: Some vgic fixes and init sequence KVM selftests | expand

Commit Message

Eric Auger April 1, 2021, 8:52 a.m. UTC
Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
reporting of GICR_TYPER.Last for userspace") temporarily fixed
a bug identified when attempting to access the GICR_TYPER
register before the redistributor region setting, but dropped
the support of the LAST bit.

Emulating the GICR_TYPER.Last bit still makes sense for
architecture compliance though. This patch restores its support
(if the redistributor region was set) while keeping the code safe.

We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
computes whether a redistributor is the highest one of a series
of redistributor contributor pages.

The spec says "Indicates whether this Redistributor is the
highest-numbered Redistributor in a series of contiguous
Redistributor pages."

The code is a bit convulated since there is no guarantee
redistributors are added in a given reditributor region in
ascending order. In that case the current implementation was
wrong. Also redistributor regions can be contiguous
and registered in non increasing base address order.

So the index of redistributors are stored in an array within
the redistributor region structure.

With this new implementation we do not need to have a uaccess
read accessor anymore.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
 arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
 arch/arm64/kvm/vgic/vgic.h         |  1 +
 include/kvm/arm_vgic.h             |  3 +
 4 files changed, 73 insertions(+), 35 deletions(-)

Comments

Marc Zyngier April 1, 2021, 1:42 p.m. UTC | #1
Hi Eric,

On Thu, 01 Apr 2021 09:52:37 +0100,
Eric Auger <eric.auger@redhat.com> wrote:
> 
> Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
> reporting of GICR_TYPER.Last for userspace") temporarily fixed
> a bug identified when attempting to access the GICR_TYPER
> register before the redistributor region setting, but dropped
> the support of the LAST bit.
> 
> Emulating the GICR_TYPER.Last bit still makes sense for
> architecture compliance though. This patch restores its support
> (if the redistributor region was set) while keeping the code safe.
> 
> We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
> computes whether a redistributor is the highest one of a series
> of redistributor contributor pages.
> 
> The spec says "Indicates whether this Redistributor is the
> highest-numbered Redistributor in a series of contiguous
> Redistributor pages."
> 
> The code is a bit convulated since there is no guarantee

nit: convoluted

> redistributors are added in a given reditributor region in
> ascending order. In that case the current implementation was
> wrong. Also redistributor regions can be contiguous
> and registered in non increasing base address order.
> 
> So the index of redistributors are stored in an array within
> the redistributor region structure.
> 
> With this new implementation we do not need to have a uaccess
> read accessor anymore.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>

This patch also hurt my head, a lot more than the first one.  See
below.

> ---
>  arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
>  arch/arm64/kvm/vgic/vgic.h         |  1 +
>  include/kvm/arm_vgic.h             |  3 +
>  4 files changed, 73 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
> index cf6faa0aeddb2..61150c34c268c 100644
> --- a/arch/arm64/kvm/vgic/vgic-init.c
> +++ b/arch/arm64/kvm/vgic/vgic-init.c
> @@ -190,6 +190,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>  	int i;
>  
>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
> +	vgic_cpu->index = vcpu->vcpu_id;

Is it so that vgic_cpu->index is always equal to vcpu_id? If so, why
do we need another field? We can always get to the vcpu using a
container_of().

>  
>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
> @@ -338,10 +339,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>  	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
>  
>  	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
> -		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
> -			list_del(&rdreg->list);
> -			kfree(rdreg);
> -		}
> +		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
> +			vgic_v3_free_redist_region(rdreg);

Consider moving the introduction of vgic_v3_free_redist_region() into
a separate patch. On its own, that's a good readability improvement.

>  		INIT_LIST_HEAD(&dist->rd_regions);
>  	} else {
>  		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> index 987e366c80008..f6a7eed1d6adb 100644
> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> @@ -251,45 +251,57 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
>  		vgic_enable_lpis(vcpu);
>  }
>  
> +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
> +{
> +	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
> +
> +	if (!rdreg)
> +		return false;
> +
> +	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
> +		/* check whether there is no other contiguous rdist region */
> +		struct list_head *rd_regions = &vgic->rd_regions;
> +		struct vgic_redist_region *iter;
> +
> +		list_for_each_entry(iter, rd_regions, list) {
> +			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
> +				iter->free_index > 0) {
> +			/* check the first rdist index of this region, if any */
> +				if (vgic_cpu->index < iter->rdist_indices[0])
> +					return false;

rdist_indices[] contains the vcpu_id of the vcpu associated with a
given RD in the region. At this stage, you have established that there
is another region that is contiguous with the one associated with our
vcpu. You also know that this adjacent region has a vcpu mapped in
(free_index isn't 0). Isn't that enough to declare that our vcpu isn't
last?  I definitely don't understand what the index comparison does
here.

It also seem to me that some of the complexity could be eliminated if
the regions were kept ordered at list insertion time.

> +			}
> +		}
> +	} else if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
> +		/* look at the index of next rdist */
> +		int next_rdist_index = rdreg->rdist_indices[vgic_cpu->rdreg_index + 1];
> +
> +		if (vgic_cpu->index < next_rdist_index)
> +			return false;

Same thing here. We are in the middle of the allocated part of a
region, which means we cannot be last. I still don't get the index
check.

> +	}
> +	return true;
> +}
> +
>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>  					      gpa_t addr, unsigned int len)
>  {
>  	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
> -	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> -	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
>  	int target_vcpu_id = vcpu->vcpu_id;
> -	gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
> -			(rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
>  	u64 value;
>  
>  	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
>  	value |= ((target_vcpu_id & 0xffff) << 8);
>  
> -	if (addr == last_rdist_typer)
> +	if (vgic_has_its(vcpu->kvm))
> +		value |= GICR_TYPER_PLPIS;
> +
> +	if (vgic_mmio_vcpu_rdist_is_last(vcpu))
>  		value |= GICR_TYPER_LAST;
> -	if (vgic_has_its(vcpu->kvm))
> -		value |= GICR_TYPER_PLPIS;
>  
>  	return extract_bytes(value, addr & 7, len);
>  }
>  
> -static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
> -						 gpa_t addr, unsigned int len)
> -{
> -	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
> -	int target_vcpu_id = vcpu->vcpu_id;
> -	u64 value;
> -
> -	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
> -	value |= ((target_vcpu_id & 0xffff) << 8);
> -
> -	if (vgic_has_its(vcpu->kvm))
> -		value |= GICR_TYPER_PLPIS;
> -
> -	/* reporting of the Last bit is not supported for userspace */
> -	return extract_bytes(value, addr & 7, len);
> -}
> -
>  static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
>  					     gpa_t addr, unsigned int len)
>  {
> @@ -612,7 +624,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
>  		VGIC_ACCESS_32bit),
>  	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
>  		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
> -		vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
> +		NULL, vgic_mmio_uaccess_write_wi, 8,
>  		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
>  	REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
> @@ -714,6 +726,16 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>  		return -EINVAL;
>  
>  	vgic_cpu->rdreg = rdreg;
> +	vgic_cpu->rdreg_index = rdreg->free_index;
> +	if (!rdreg->count) {
> +		void *p = krealloc(rdreg->rdist_indices,
> +				   (vgic_cpu->rdreg_index + 1) * sizeof(u32),
> +				   GFP_KERNEL);
> +		if (!p)
> +			return -ENOMEM;
> +		rdreg->rdist_indices = p;
> +	}
> +	rdreg->rdist_indices[vgic_cpu->rdreg_index] = vgic_cpu->index;

I think I really have a problem with this array, which comes from me
not understanding the two checks I previously commented on.

If we stick to the definition of 'Last', all that matters is the
position of the RD in a region (rdreg_index) and potentially the
presence of another contiguous region with allocated RDs in it.

IIUC, the checks should read like this:

if (vcpu->rdreg_index < (vcpu->rdreg->free_index - 1))
	last = false;
else if (vcpu->rdreg_index == (vcpu->rdreg->free_index - 1) &&
	 adjacent_region(vcpu->rdreg)->free_index > 0)
	last = false;
else
	last = true;

So why do we need to track the vcpu_id associated to a region?

>
>  	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
>  
> @@ -768,7 +790,7 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
>  }
>  
>  /**
> - * vgic_v3_insert_redist_region - Insert a new redistributor region
> + * vgic_v3_alloc_redist_region - Allocate a new redistributor region
>   *
>   * Performs various checks before inserting the rdist region in the list.
>   * Those tests depend on whether the size of the rdist region is known
> @@ -782,8 +804,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
>   *
>   * Return 0 on success, < 0 otherwise
>   */
> -static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
> -					gpa_t base, uint32_t count)
> +static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
> +				       gpa_t base, uint32_t count)
>  {
>  	struct vgic_dist *d = &kvm->arch.vgic;
>  	struct vgic_redist_region *rdreg;
> @@ -839,6 +861,13 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
>  	rdreg->count = count;
>  	rdreg->free_index = 0;
>  	rdreg->index = index;
> +	if (count) {
> +		rdreg->rdist_indices = kcalloc(count, sizeof(u32), GFP_KERNEL);
> +		if (!rdreg->rdist_indices) {
> +			ret = -ENOMEM;
> +			goto free;
> +		}
> +	}
>  
>  	list_add_tail(&rdreg->list, rd_regions);
>  	return 0;
> @@ -847,11 +876,18 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
>  	return ret;
>  }
>  
> +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
> +{
> +	list_del(&rdreg->list);
> +	kfree(rdreg->rdist_indices);
> +	kfree(rdreg);
> +}
> +
>  int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
>  {
>  	int ret;
>  
> -	ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
> +	ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
>  	if (ret)
>  		return ret;
>  
> @@ -864,8 +900,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
>  		struct vgic_redist_region *rdreg;
>  
>  		rdreg = vgic_v3_rdist_region_from_index(kvm, index);
> -		list_del(&rdreg->list);
> -		kfree(rdreg);
> +		vgic_v3_free_redist_region(rdreg);
>  		return ret;
>  	}
>  
> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
> index 64fcd75111108..bc418c2c12141 100644
> --- a/arch/arm64/kvm/vgic/vgic.h
> +++ b/arch/arm64/kvm/vgic/vgic.h
> @@ -293,6 +293,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
>  
>  struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
>  							   u32 index);
> +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
>  
>  bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
>  
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 3d74f1060bd18..9a3f060ac3547 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -197,6 +197,7 @@ struct vgic_redist_region {
>  	gpa_t base;
>  	u32 count; /* number of redistributors or 0 if single region */
>  	u32 free_index; /* index of the next free redistributor */
> +	int *rdist_indices; /* indices of the redistributors */

You are treating it as an array of u32 when allocating it. Please
choose one type or the other.

>  	struct list_head list;
>  };
>  
> @@ -322,6 +323,8 @@ struct vgic_cpu {
>  	 */
>  	struct vgic_io_device	rd_iodev;
>  	struct vgic_redist_region *rdreg;
> +	u32 rdreg_index;
> +	int index; /* vcpu index */
>  
>  	/* Contains the attributes and gpa of the LPI pending tables. */
>  	u64 pendbaser;

Thanks,

	M.
Eric Auger April 1, 2021, 5:03 p.m. UTC | #2
Hi Marc,

On 4/1/21 3:42 PM, Marc Zyngier wrote:
> Hi Eric,
> 
> On Thu, 01 Apr 2021 09:52:37 +0100,
> Eric Auger <eric.auger@redhat.com> wrote:
>>
>> Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
>> reporting of GICR_TYPER.Last for userspace") temporarily fixed
>> a bug identified when attempting to access the GICR_TYPER
>> register before the redistributor region setting, but dropped
>> the support of the LAST bit.
>>
>> Emulating the GICR_TYPER.Last bit still makes sense for
>> architecture compliance though. This patch restores its support
>> (if the redistributor region was set) while keeping the code safe.
>>
>> We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
>> computes whether a redistributor is the highest one of a series
>> of redistributor contributor pages.
>>
>> The spec says "Indicates whether this Redistributor is the
>> highest-numbered Redistributor in a series of contiguous
>> Redistributor pages."
>>
>> The code is a bit convulated since there is no guarantee
> 
> nit: convoluted
> 
>> redistributors are added in a given reditributor region in
>> ascending order. In that case the current implementation was
>> wrong. Also redistributor regions can be contiguous
>> and registered in non increasing base address order.
>>
>> So the index of redistributors are stored in an array within
>> the redistributor region structure.
>>
>> With this new implementation we do not need to have a uaccess
>> read accessor anymore.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> This patch also hurt my head, a lot more than the first one.  See
> below.
> 
>> ---
>>  arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
>>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
>>  arch/arm64/kvm/vgic/vgic.h         |  1 +
>>  include/kvm/arm_vgic.h             |  3 +
>>  4 files changed, 73 insertions(+), 35 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
>> index cf6faa0aeddb2..61150c34c268c 100644
>> --- a/arch/arm64/kvm/vgic/vgic-init.c
>> +++ b/arch/arm64/kvm/vgic/vgic-init.c
>> @@ -190,6 +190,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>  	int i;
>>  
>>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
>> +	vgic_cpu->index = vcpu->vcpu_id;
> 
> Is it so that vgic_cpu->index is always equal to vcpu_id? If so, why
> do we need another field? We can always get to the vcpu using a
> container_of().
> 
>>  
>>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
>>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
>> @@ -338,10 +339,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>>  	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
>>  
>>  	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
>> -		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
>> -			list_del(&rdreg->list);
>> -			kfree(rdreg);
>> -		}
>> +		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
>> +			vgic_v3_free_redist_region(rdreg);
> 
> Consider moving the introduction of vgic_v3_free_redist_region() into
> a separate patch. On its own, that's a good readability improvement.
> 
>>  		INIT_LIST_HEAD(&dist->rd_regions);
>>  	} else {
>>  		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
>> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>> index 987e366c80008..f6a7eed1d6adb 100644
>> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>> @@ -251,45 +251,57 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
>>  		vgic_enable_lpis(vcpu);
>>  }
>>  
>> +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
>> +{
>> +	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
>> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
>> +
>> +	if (!rdreg)
>> +		return false;
>> +
>> +	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
>> +		/* check whether there is no other contiguous rdist region */
>> +		struct list_head *rd_regions = &vgic->rd_regions;
>> +		struct vgic_redist_region *iter;
>> +
>> +		list_for_each_entry(iter, rd_regions, list) {
>> +			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
>> +				iter->free_index > 0) {
>> +			/* check the first rdist index of this region, if any */
>> +				if (vgic_cpu->index < iter->rdist_indices[0])
>> +					return false;
> 
> rdist_indices[] contains the vcpu_id of the vcpu associated with a
> given RD in the region. At this stage, you have established that there
> is another region that is contiguous with the one associated with our
> vcpu. You also know that this adjacent region has a vcpu mapped in
> (free_index isn't 0). Isn't that enough to declare that our vcpu isn't
> last?  I definitely don't understand what the index comparison does
> here.
Assume the following case:
2 RDIST region
region #0 contains rdist 1, 2, 4
region #1, adjacent to #0 contains rdist 3

Spec days:
"Indicates whether this Redistributor is the
highest-numbered Redistributor in a series of contiguous
Redistributor pages."

To me 4 is last and 3 is last too.


> 
> It also seem to me that some of the complexity could be eliminated if
> the regions were kept ordered at list insertion time.
yes
> 
>> +			}
>> +		}
>> +	} else if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
>> +		/* look at the index of next rdist */
>> +		int next_rdist_index = rdreg->rdist_indices[vgic_cpu->rdreg_index + 1];
>> +
>> +		if (vgic_cpu->index < next_rdist_index)
>> +			return false;
> 
> Same thing here. We are in the middle of the allocated part of a
> region, which means we cannot be last. I still don't get the index
> check.
Because within a region, nothing hinders rdist from being allocated in
non ascending order. I exercise those cases in the kvmselftests

one single RDIST region with the following rdists allocated there:
1, 3, 2

3 and 2 are "last", right? Or did I miss something. Yes that's totally
not natural to do that kind of allocation but the API allows to do that.


> 
>> +	}
>> +	return true;
>> +}
>> +
>>  static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
>>  					      gpa_t addr, unsigned int len)
>>  {
>>  	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
>> -	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> -	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
>>  	int target_vcpu_id = vcpu->vcpu_id;
>> -	gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
>> -			(rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
>>  	u64 value;
>>  
>>  	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
>>  	value |= ((target_vcpu_id & 0xffff) << 8);
>>  
>> -	if (addr == last_rdist_typer)
>> +	if (vgic_has_its(vcpu->kvm))
>> +		value |= GICR_TYPER_PLPIS;
>> +
>> +	if (vgic_mmio_vcpu_rdist_is_last(vcpu))
>>  		value |= GICR_TYPER_LAST;
>> -	if (vgic_has_its(vcpu->kvm))
>> -		value |= GICR_TYPER_PLPIS;
>>  
>>  	return extract_bytes(value, addr & 7, len);
>>  }
>>  
>> -static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
>> -						 gpa_t addr, unsigned int len)
>> -{
>> -	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
>> -	int target_vcpu_id = vcpu->vcpu_id;
>> -	u64 value;
>> -
>> -	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
>> -	value |= ((target_vcpu_id & 0xffff) << 8);
>> -
>> -	if (vgic_has_its(vcpu->kvm))
>> -		value |= GICR_TYPER_PLPIS;
>> -
>> -	/* reporting of the Last bit is not supported for userspace */
>> -	return extract_bytes(value, addr & 7, len);
>> -}
>> -
>>  static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
>>  					     gpa_t addr, unsigned int len)
>>  {
>> @@ -612,7 +624,7 @@ static const struct vgic_register_region vgic_v3_rd_registers[] = {
>>  		VGIC_ACCESS_32bit),
>>  	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
>>  		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
>> -		vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
>> +		NULL, vgic_mmio_uaccess_write_wi, 8,
>>  		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
>>  	REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
>>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
>> @@ -714,6 +726,16 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>>  		return -EINVAL;
>>  
>>  	vgic_cpu->rdreg = rdreg;
>> +	vgic_cpu->rdreg_index = rdreg->free_index;
>> +	if (!rdreg->count) {
>> +		void *p = krealloc(rdreg->rdist_indices,
>> +				   (vgic_cpu->rdreg_index + 1) * sizeof(u32),
>> +				   GFP_KERNEL);
>> +		if (!p)
>> +			return -ENOMEM;
>> +		rdreg->rdist_indices = p;
>> +	}
>> +	rdreg->rdist_indices[vgic_cpu->rdreg_index] = vgic_cpu->index;
> 
> I think I really have a problem with this array, which comes from me
> not understanding the two checks I previously commented on.

I hope the above clarified the array need.
> 
> If we stick to the definition of 'Last', all that matters is the
> position of the RD in a region (rdreg_index) and potentially the
> presence of another contiguous region with allocated RDs in it.

> 
> IIUC, the checks should read like this:
> 
> if (vcpu->rdreg_index < (vcpu->rdreg->free_index - 1))
> 	last = false;
> else if (vcpu->rdreg_index == (vcpu->rdreg->free_index - 1) &&
> 	 adjacent_region(vcpu->rdreg)->free_index > 0)
> 	last = false;
> else
> 	last = true;
> 
> So why do we need to track the vcpu_id associated to a region?
because the redistributors within a region can be in random order.
That's why I need to store their number.

Does that make more sense?

Thanks

Eric
> 
>>
>>  	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
>>  
>> @@ -768,7 +790,7 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
>>  }
>>  
>>  /**
>> - * vgic_v3_insert_redist_region - Insert a new redistributor region
>> + * vgic_v3_alloc_redist_region - Allocate a new redistributor region
>>   *
>>   * Performs various checks before inserting the rdist region in the list.
>>   * Those tests depend on whether the size of the rdist region is known
>> @@ -782,8 +804,8 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm)
>>   *
>>   * Return 0 on success, < 0 otherwise
>>   */
>> -static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
>> -					gpa_t base, uint32_t count)
>> +static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
>> +				       gpa_t base, uint32_t count)
>>  {
>>  	struct vgic_dist *d = &kvm->arch.vgic;
>>  	struct vgic_redist_region *rdreg;
>> @@ -839,6 +861,13 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
>>  	rdreg->count = count;
>>  	rdreg->free_index = 0;
>>  	rdreg->index = index;
>> +	if (count) {
>> +		rdreg->rdist_indices = kcalloc(count, sizeof(u32), GFP_KERNEL);
>> +		if (!rdreg->rdist_indices) {
>> +			ret = -ENOMEM;
>> +			goto free;
>> +		}
>> +	}
>>  
>>  	list_add_tail(&rdreg->list, rd_regions);
>>  	return 0;
>> @@ -847,11 +876,18 @@ static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
>>  	return ret;
>>  }
>>  
>> +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
>> +{
>> +	list_del(&rdreg->list);
>> +	kfree(rdreg->rdist_indices);
>> +	kfree(rdreg);
>> +}
>> +
>>  int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
>>  {
>>  	int ret;
>>  
>> -	ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
>> +	ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
>>  	if (ret)
>>  		return ret;
>>  
>> @@ -864,8 +900,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
>>  		struct vgic_redist_region *rdreg;
>>  
>>  		rdreg = vgic_v3_rdist_region_from_index(kvm, index);
>> -		list_del(&rdreg->list);
>> -		kfree(rdreg);
>> +		vgic_v3_free_redist_region(rdreg);
>>  		return ret;
>>  	}
>>  
>> diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
>> index 64fcd75111108..bc418c2c12141 100644
>> --- a/arch/arm64/kvm/vgic/vgic.h
>> +++ b/arch/arm64/kvm/vgic/vgic.h
>> @@ -293,6 +293,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
>>  
>>  struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
>>  							   u32 index);
>> +void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
>>  
>>  bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
>>  
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index 3d74f1060bd18..9a3f060ac3547 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -197,6 +197,7 @@ struct vgic_redist_region {
>>  	gpa_t base;
>>  	u32 count; /* number of redistributors or 0 if single region */
>>  	u32 free_index; /* index of the next free redistributor */
>> +	int *rdist_indices; /* indices of the redistributors */
> 
> You are treating it as an array of u32 when allocating it. Please
> choose one type or the other.
> 
>>  	struct list_head list;
>>  };
>>  
>> @@ -322,6 +323,8 @@ struct vgic_cpu {
>>  	 */
>>  	struct vgic_io_device	rd_iodev;
>>  	struct vgic_redist_region *rdreg;
>> +	u32 rdreg_index;
>> +	int index; /* vcpu index */
>>  
>>  	/* Contains the attributes and gpa of the LPI pending tables. */
>>  	u64 pendbaser;
> 
> Thanks,
> 
> 	M.
>
Marc Zyngier April 1, 2021, 5:30 p.m. UTC | #3
On Thu, 01 Apr 2021 18:03:25 +0100,
Auger Eric <eric.auger@redhat.com> wrote:
> 
> Hi Marc,
> 
> On 4/1/21 3:42 PM, Marc Zyngier wrote:
> > Hi Eric,
> > 
> > On Thu, 01 Apr 2021 09:52:37 +0100,
> > Eric Auger <eric.auger@redhat.com> wrote:
> >>
> >> Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
> >> reporting of GICR_TYPER.Last for userspace") temporarily fixed
> >> a bug identified when attempting to access the GICR_TYPER
> >> register before the redistributor region setting, but dropped
> >> the support of the LAST bit.
> >>
> >> Emulating the GICR_TYPER.Last bit still makes sense for
> >> architecture compliance though. This patch restores its support
> >> (if the redistributor region was set) while keeping the code safe.
> >>
> >> We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
> >> computes whether a redistributor is the highest one of a series
> >> of redistributor contributor pages.
> >>
> >> The spec says "Indicates whether this Redistributor is the
> >> highest-numbered Redistributor in a series of contiguous
> >> Redistributor pages."
> >>
> >> The code is a bit convulated since there is no guarantee
> > 
> > nit: convoluted
> > 
> >> redistributors are added in a given reditributor region in
> >> ascending order. In that case the current implementation was
> >> wrong. Also redistributor regions can be contiguous
> >> and registered in non increasing base address order.
> >>
> >> So the index of redistributors are stored in an array within
> >> the redistributor region structure.
> >>
> >> With this new implementation we do not need to have a uaccess
> >> read accessor anymore.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> > 
> > This patch also hurt my head, a lot more than the first one.  See
> > below.
> > 
> >> ---
> >>  arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
> >>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
> >>  arch/arm64/kvm/vgic/vgic.h         |  1 +
> >>  include/kvm/arm_vgic.h             |  3 +
> >>  4 files changed, 73 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
> >> index cf6faa0aeddb2..61150c34c268c 100644
> >> --- a/arch/arm64/kvm/vgic/vgic-init.c
> >> +++ b/arch/arm64/kvm/vgic/vgic-init.c
> >> @@ -190,6 +190,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
> >>  	int i;
> >>  
> >>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
> >> +	vgic_cpu->index = vcpu->vcpu_id;
> > 
> > Is it so that vgic_cpu->index is always equal to vcpu_id? If so, why
> > do we need another field? We can always get to the vcpu using a
> > container_of().
> > 
> >>  
> >>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
> >>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
> >> @@ -338,10 +339,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
> >>  	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
> >>  
> >>  	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
> >> -		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
> >> -			list_del(&rdreg->list);
> >> -			kfree(rdreg);
> >> -		}
> >> +		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
> >> +			vgic_v3_free_redist_region(rdreg);
> > 
> > Consider moving the introduction of vgic_v3_free_redist_region() into
> > a separate patch. On its own, that's a good readability improvement.
> > 
> >>  		INIT_LIST_HEAD(&dist->rd_regions);
> >>  	} else {
> >>  		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
> >> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >> index 987e366c80008..f6a7eed1d6adb 100644
> >> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >> @@ -251,45 +251,57 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
> >>  		vgic_enable_lpis(vcpu);
> >>  }
> >>  
> >> +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
> >> +{
> >> +	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
> >> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >> +	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
> >> +
> >> +	if (!rdreg)
> >> +		return false;
> >> +
> >> +	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
> >> +		/* check whether there is no other contiguous rdist region */
> >> +		struct list_head *rd_regions = &vgic->rd_regions;
> >> +		struct vgic_redist_region *iter;
> >> +
> >> +		list_for_each_entry(iter, rd_regions, list) {
> >> +			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
> >> +				iter->free_index > 0) {
> >> +			/* check the first rdist index of this region, if any */
> >> +				if (vgic_cpu->index < iter->rdist_indices[0])
> >> +					return false;
> > 
> > rdist_indices[] contains the vcpu_id of the vcpu associated with a
> > given RD in the region. At this stage, you have established that there
> > is another region that is contiguous with the one associated with our
> > vcpu. You also know that this adjacent region has a vcpu mapped in
> > (free_index isn't 0). Isn't that enough to declare that our vcpu isn't
> > last?  I definitely don't understand what the index comparison does
> > here.
> Assume the following case:
> 2 RDIST region
> region #0 contains rdist 1, 2, 4
> region #1, adjacent to #0 contains rdist 3
> 
> Spec days:
> "Indicates whether this Redistributor is the
> highest-numbered Redistributor in a series of contiguous
> Redistributor pages."
> 
> To me 4 is last and 3 is last too.

No, only 3 is last, assuming that region 0 is full. I think the
phrasing in the spec is just really bad. What this describes is that
at the end of a set of contiguous set of RDs, that last RD has Last
set. If two regions are contiguous, that's undistinguishable from a
single, larger region.

There is no such thing as a "redistributor number" anyway. The closest
thing there is would be "processor number", but that has nothing to do
with the RD itself.

> 
> 
> > 
> > It also seem to me that some of the complexity could be eliminated if
> > the regions were kept ordered at list insertion time.
> yes
> > 
> >> +			}
> >> +		}
> >> +	} else if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
> >> +		/* look at the index of next rdist */
> >> +		int next_rdist_index = rdreg->rdist_indices[vgic_cpu->rdreg_index + 1];
> >> +
> >> +		if (vgic_cpu->index < next_rdist_index)
> >> +			return false;
> > 
> > Same thing here. We are in the middle of the allocated part of a
> > region, which means we cannot be last. I still don't get the index
> > check.
> Because within a region, nothing hinders rdist from being allocated in
> non ascending order. I exercise those cases in the kvmselftests
> 
> one single RDIST region with the following rdists allocated there:
> 1, 3, 2
> 
> 3 and 2 are "last", right? Or did I miss something. Yes that's totally
> not natural to do that kind of allocation but the API allows to do that.

No, only 2 is last. I think you got tripped by the bizarre language in
the spec, and the behaviour of this Last bit is much simpler than what
you ended up with.

Thanks,

	M.
Eric Auger April 1, 2021, 7:16 p.m. UTC | #4
Hi Marc,

On 4/1/21 7:30 PM, Marc Zyngier wrote:
> On Thu, 01 Apr 2021 18:03:25 +0100,
> Auger Eric <eric.auger@redhat.com> wrote:
>>
>> Hi Marc,
>>
>> On 4/1/21 3:42 PM, Marc Zyngier wrote:
>>> Hi Eric,
>>>
>>> On Thu, 01 Apr 2021 09:52:37 +0100,
>>> Eric Auger <eric.auger@redhat.com> wrote:
>>>>
>>>> Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
>>>> reporting of GICR_TYPER.Last for userspace") temporarily fixed
>>>> a bug identified when attempting to access the GICR_TYPER
>>>> register before the redistributor region setting, but dropped
>>>> the support of the LAST bit.
>>>>
>>>> Emulating the GICR_TYPER.Last bit still makes sense for
>>>> architecture compliance though. This patch restores its support
>>>> (if the redistributor region was set) while keeping the code safe.
>>>>
>>>> We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
>>>> computes whether a redistributor is the highest one of a series
>>>> of redistributor contributor pages.
>>>>
>>>> The spec says "Indicates whether this Redistributor is the
>>>> highest-numbered Redistributor in a series of contiguous
>>>> Redistributor pages."
>>>>
>>>> The code is a bit convulated since there is no guarantee
>>>
>>> nit: convoluted
>>>
>>>> redistributors are added in a given reditributor region in
>>>> ascending order. In that case the current implementation was
>>>> wrong. Also redistributor regions can be contiguous
>>>> and registered in non increasing base address order.
>>>>
>>>> So the index of redistributors are stored in an array within
>>>> the redistributor region structure.
>>>>
>>>> With this new implementation we do not need to have a uaccess
>>>> read accessor anymore.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> This patch also hurt my head, a lot more than the first one.  See
>>> below.
>>>
>>>> ---
>>>>  arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
>>>>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
>>>>  arch/arm64/kvm/vgic/vgic.h         |  1 +
>>>>  include/kvm/arm_vgic.h             |  3 +
>>>>  4 files changed, 73 insertions(+), 35 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
>>>> index cf6faa0aeddb2..61150c34c268c 100644
>>>> --- a/arch/arm64/kvm/vgic/vgic-init.c
>>>> +++ b/arch/arm64/kvm/vgic/vgic-init.c
>>>> @@ -190,6 +190,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>>>  	int i;
>>>>  
>>>>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
>>>> +	vgic_cpu->index = vcpu->vcpu_id;
>>>
>>> Is it so that vgic_cpu->index is always equal to vcpu_id? If so, why
>>> do we need another field? We can always get to the vcpu using a
>>> container_of().
>>>
>>>>  
>>>>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
>>>>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
>>>> @@ -338,10 +339,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
>>>>  	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
>>>>  
>>>>  	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
>>>> -		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
>>>> -			list_del(&rdreg->list);
>>>> -			kfree(rdreg);
>>>> -		}
>>>> +		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
>>>> +			vgic_v3_free_redist_region(rdreg);
>>>
>>> Consider moving the introduction of vgic_v3_free_redist_region() into
>>> a separate patch. On its own, that's a good readability improvement.
>>>
>>>>  		INIT_LIST_HEAD(&dist->rd_regions);
>>>>  	} else {
>>>>  		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
>>>> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>>>> index 987e366c80008..f6a7eed1d6adb 100644
>>>> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>>>> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
>>>> @@ -251,45 +251,57 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
>>>>  		vgic_enable_lpis(vcpu);
>>>>  }
>>>>  
>>>> +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
>>>> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>>>> +	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
>>>> +
>>>> +	if (!rdreg)
>>>> +		return false;
>>>> +
>>>> +	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
>>>> +		/* check whether there is no other contiguous rdist region */
>>>> +		struct list_head *rd_regions = &vgic->rd_regions;
>>>> +		struct vgic_redist_region *iter;
>>>> +
>>>> +		list_for_each_entry(iter, rd_regions, list) {
>>>> +			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
>>>> +				iter->free_index > 0) {
>>>> +			/* check the first rdist index of this region, if any */
>>>> +				if (vgic_cpu->index < iter->rdist_indices[0])
>>>> +					return false;
>>>
>>> rdist_indices[] contains the vcpu_id of the vcpu associated with a
>>> given RD in the region. At this stage, you have established that there
>>> is another region that is contiguous with the one associated with our
>>> vcpu. You also know that this adjacent region has a vcpu mapped in
>>> (free_index isn't 0). Isn't that enough to declare that our vcpu isn't
>>> last?  I definitely don't understand what the index comparison does
>>> here.
>> Assume the following case:
>> 2 RDIST region
>> region #0 contains rdist 1, 2, 4
>> region #1, adjacent to #0 contains rdist 3
>>
>> Spec days:
>> "Indicates whether this Redistributor is the
>> highest-numbered Redistributor in a series of contiguous
>> Redistributor pages."
>>
>> To me 4 is last and 3 is last too.
> 
> No, only 3 is last, assuming that region 0 is full. I think the
> phrasing in the spec is just really bad. What this describes is that
> at the end of a set of contiguous set of RDs, that last RD has Last
> set. If two regions are contiguous, that's undistinguishable from a
> single, larger region.
> 
> There is no such thing as a "redistributor number" anyway. The closest
> thing there is would be "processor number", but that has nothing to do
> with the RD itself.

Hum OK. That's a different understanding of the spec wording indeed. For
me redistributor number was the index of the vcpu.

But well, you're understanding is definitively simpler to implement and
also matches what was implemented for single RDIST region.

> 
>>
>>
>>>
>>> It also seem to me that some of the complexity could be eliminated if
>>> the regions were kept ordered at list insertion time.
>> yes
>>>
>>>> +			}
>>>> +		}
>>>> +	} else if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
>>>> +		/* look at the index of next rdist */
>>>> +		int next_rdist_index = rdreg->rdist_indices[vgic_cpu->rdreg_index + 1];
>>>> +
>>>> +		if (vgic_cpu->index < next_rdist_index)
>>>> +			return false;
>>>
>>> Same thing here. We are in the middle of the allocated part of a
>>> region, which means we cannot be last. I still don't get the index
>>> check.
>> Because within a region, nothing hinders rdist from being allocated in
>> non ascending order. I exercise those cases in the kvmselftests
>>
>> one single RDIST region with the following rdists allocated there:
>> 1, 3, 2
>>
>> 3 and 2 are "last", right? Or did I miss something. Yes that's totally
>> not natural to do that kind of allocation but the API allows to do that.
> 
> No, only 2 is last. I think you got tripped by the bizarre language in
> the spec, and the behaviour of this Last bit is much simpler than what
> you ended up with.


OK, I will respin according to your suggestion then.

Thanks

Eric
> 
> Thanks,
> 
> 	M.
>
Marc Zyngier April 2, 2021, 9:32 a.m. UTC | #5
Hi Eric,

On Thu, 01 Apr 2021 20:16:53 +0100,
Auger Eric <eric.auger@redhat.com> wrote:
> 
> Hi Marc,
> 
> On 4/1/21 7:30 PM, Marc Zyngier wrote:
> > On Thu, 01 Apr 2021 18:03:25 +0100,
> > Auger Eric <eric.auger@redhat.com> wrote:
> >>
> >> Hi Marc,
> >>
> >> On 4/1/21 3:42 PM, Marc Zyngier wrote:
> >>> Hi Eric,
> >>>
> >>> On Thu, 01 Apr 2021 09:52:37 +0100,
> >>> Eric Auger <eric.auger@redhat.com> wrote:
> >>>>
> >>>> Commit 23bde34771f1 ("KVM: arm64: vgic-v3: Drop the
> >>>> reporting of GICR_TYPER.Last for userspace") temporarily fixed
> >>>> a bug identified when attempting to access the GICR_TYPER
> >>>> register before the redistributor region setting, but dropped
> >>>> the support of the LAST bit.
> >>>>
> >>>> Emulating the GICR_TYPER.Last bit still makes sense for
> >>>> architecture compliance though. This patch restores its support
> >>>> (if the redistributor region was set) while keeping the code safe.
> >>>>
> >>>> We introduce a new helper, vgic_mmio_vcpu_rdist_is_last() which
> >>>> computes whether a redistributor is the highest one of a series
> >>>> of redistributor contributor pages.
> >>>>
> >>>> The spec says "Indicates whether this Redistributor is the
> >>>> highest-numbered Redistributor in a series of contiguous
> >>>> Redistributor pages."
> >>>>
> >>>> The code is a bit convulated since there is no guarantee
> >>>
> >>> nit: convoluted
> >>>
> >>>> redistributors are added in a given reditributor region in
> >>>> ascending order. In that case the current implementation was
> >>>> wrong. Also redistributor regions can be contiguous
> >>>> and registered in non increasing base address order.
> >>>>
> >>>> So the index of redistributors are stored in an array within
> >>>> the redistributor region structure.
> >>>>
> >>>> With this new implementation we do not need to have a uaccess
> >>>> read accessor anymore.
> >>>>
> >>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>>
> >>> This patch also hurt my head, a lot more than the first one.  See
> >>> below.
> >>>
> >>>> ---
> >>>>  arch/arm64/kvm/vgic/vgic-init.c    |  7 +--
> >>>>  arch/arm64/kvm/vgic/vgic-mmio-v3.c | 97 ++++++++++++++++++++----------
> >>>>  arch/arm64/kvm/vgic/vgic.h         |  1 +
> >>>>  include/kvm/arm_vgic.h             |  3 +
> >>>>  4 files changed, 73 insertions(+), 35 deletions(-)
> >>>>
> >>>> diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
> >>>> index cf6faa0aeddb2..61150c34c268c 100644
> >>>> --- a/arch/arm64/kvm/vgic/vgic-init.c
> >>>> +++ b/arch/arm64/kvm/vgic/vgic-init.c
> >>>> @@ -190,6 +190,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
> >>>>  	int i;
> >>>>  
> >>>>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
> >>>> +	vgic_cpu->index = vcpu->vcpu_id;
> >>>
> >>> Is it so that vgic_cpu->index is always equal to vcpu_id? If so, why
> >>> do we need another field? We can always get to the vcpu using a
> >>> container_of().
> >>>
> >>>>  
> >>>>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
> >>>>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
> >>>> @@ -338,10 +339,8 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
> >>>>  	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
> >>>>  
> >>>>  	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
> >>>> -		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
> >>>> -			list_del(&rdreg->list);
> >>>> -			kfree(rdreg);
> >>>> -		}
> >>>> +		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
> >>>> +			vgic_v3_free_redist_region(rdreg);
> >>>
> >>> Consider moving the introduction of vgic_v3_free_redist_region() into
> >>> a separate patch. On its own, that's a good readability improvement.
> >>>
> >>>>  		INIT_LIST_HEAD(&dist->rd_regions);
> >>>>  	} else {
> >>>>  		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
> >>>> diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >>>> index 987e366c80008..f6a7eed1d6adb 100644
> >>>> --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >>>> +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
> >>>> @@ -251,45 +251,57 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
> >>>>  		vgic_enable_lpis(vcpu);
> >>>>  }
> >>>>  
> >>>> +static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
> >>>> +	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> >>>> +	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
> >>>> +
> >>>> +	if (!rdreg)
> >>>> +		return false;
> >>>> +
> >>>> +	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
> >>>> +		/* check whether there is no other contiguous rdist region */
> >>>> +		struct list_head *rd_regions = &vgic->rd_regions;
> >>>> +		struct vgic_redist_region *iter;
> >>>> +
> >>>> +		list_for_each_entry(iter, rd_regions, list) {
> >>>> +			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
> >>>> +				iter->free_index > 0) {
> >>>> +			/* check the first rdist index of this region, if any */
> >>>> +				if (vgic_cpu->index < iter->rdist_indices[0])
> >>>> +					return false;
> >>>
> >>> rdist_indices[] contains the vcpu_id of the vcpu associated with a
> >>> given RD in the region. At this stage, you have established that there
> >>> is another region that is contiguous with the one associated with our
> >>> vcpu. You also know that this adjacent region has a vcpu mapped in
> >>> (free_index isn't 0). Isn't that enough to declare that our vcpu isn't
> >>> last?  I definitely don't understand what the index comparison does
> >>> here.
> >> Assume the following case:
> >> 2 RDIST region
> >> region #0 contains rdist 1, 2, 4
> >> region #1, adjacent to #0 contains rdist 3
> >>
> >> Spec days:
> >> "Indicates whether this Redistributor is the
> >> highest-numbered Redistributor in a series of contiguous
> >> Redistributor pages."
> >>
> >> To me 4 is last and 3 is last too.
> > 
> > No, only 3 is last, assuming that region 0 is full. I think the
> > phrasing in the spec is just really bad. What this describes is that
> > at the end of a set of contiguous set of RDs, that last RD has Last
> > set. If two regions are contiguous, that's undistinguishable from a
> > single, larger region.
> > 
> > There is no such thing as a "redistributor number" anyway. The closest
> > thing there is would be "processor number", but that has nothing to do
> > with the RD itself.
> 
> Hum OK. That's a different understanding of the spec wording indeed. For
> me redistributor number was the index of the vcpu.

I think that's the source of the confusion. There really is nothing
like a redistributor number. There is a processor number when
GICR_TYPER.PTA=0 (that the guest uses as the target CPU when moving a
LPI), but that's it. The layout is totally dumb, and the last frame in
a contiguous sequence of frames is, well, last. The content of the
frames doesn't matter in the least.

> But well, you're understanding is definitively simpler to implement and
> also matches what was implemented for single RDIST region.

That's a key insight. There is no reason why the RD layout would defer
between a single region and multiple regions.

Think of it from a HW perspective. You design a SoC that has
"clusters" of CPUs, and you lay down a bunch of RDs, one set per
cluster. Each set has a "Last" RD frame, and that's all there is to
it.

I'll try and see if ARM people are willing to clarify the spec (for
which an update is long overdue).

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index cf6faa0aeddb2..61150c34c268c 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -190,6 +190,7 @@  int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	int i;
 
 	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+	vgic_cpu->index = vcpu->vcpu_id;
 
 	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
 	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
@@ -338,10 +339,8 @@  static void kvm_vgic_dist_destroy(struct kvm *kvm)
 	dist->vgic_dist_base = VGIC_ADDR_UNDEF;
 
 	if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
-		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) {
-			list_del(&rdreg->list);
-			kfree(rdreg);
-		}
+		list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
+			vgic_v3_free_redist_region(rdreg);
 		INIT_LIST_HEAD(&dist->rd_regions);
 	} else {
 		dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index 987e366c80008..f6a7eed1d6adb 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -251,45 +251,57 @@  static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
 		vgic_enable_lpis(vcpu);
 }
 
+static bool vgic_mmio_vcpu_rdist_is_last(struct kvm_vcpu *vcpu)
+{
+	struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
+
+	if (!rdreg)
+		return false;
+
+	if (rdreg->count && vgic_cpu->rdreg_index == (rdreg->count - 1)) {
+		/* check whether there is no other contiguous rdist region */
+		struct list_head *rd_regions = &vgic->rd_regions;
+		struct vgic_redist_region *iter;
+
+		list_for_each_entry(iter, rd_regions, list) {
+			if (iter->base == rdreg->base + rdreg->count * KVM_VGIC_V3_REDIST_SIZE &&
+				iter->free_index > 0) {
+			/* check the first rdist index of this region, if any */
+				if (vgic_cpu->index < iter->rdist_indices[0])
+					return false;
+			}
+		}
+	} else if (vgic_cpu->rdreg_index < rdreg->free_index - 1) {
+		/* look at the index of next rdist */
+		int next_rdist_index = rdreg->rdist_indices[vgic_cpu->rdreg_index + 1];
+
+		if (vgic_cpu->index < next_rdist_index)
+			return false;
+	}
+	return true;
+}
+
 static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
 					      gpa_t addr, unsigned int len)
 {
 	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_redist_region *rdreg = vgic_cpu->rdreg;
 	int target_vcpu_id = vcpu->vcpu_id;
-	gpa_t last_rdist_typer = rdreg->base + GICR_TYPER +
-			(rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE;
 	u64 value;
 
 	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
 	value |= ((target_vcpu_id & 0xffff) << 8);
 
-	if (addr == last_rdist_typer)
+	if (vgic_has_its(vcpu->kvm))
+		value |= GICR_TYPER_PLPIS;
+
+	if (vgic_mmio_vcpu_rdist_is_last(vcpu))
 		value |= GICR_TYPER_LAST;
-	if (vgic_has_its(vcpu->kvm))
-		value |= GICR_TYPER_PLPIS;
 
 	return extract_bytes(value, addr & 7, len);
 }
 
-static unsigned long vgic_uaccess_read_v3r_typer(struct kvm_vcpu *vcpu,
-						 gpa_t addr, unsigned int len)
-{
-	unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
-	int target_vcpu_id = vcpu->vcpu_id;
-	u64 value;
-
-	value = (u64)(mpidr & GENMASK(23, 0)) << 32;
-	value |= ((target_vcpu_id & 0xffff) << 8);
-
-	if (vgic_has_its(vcpu->kvm))
-		value |= GICR_TYPER_PLPIS;
-
-	/* reporting of the Last bit is not supported for userspace */
-	return extract_bytes(value, addr & 7, len);
-}
-
 static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
 					     gpa_t addr, unsigned int len)
 {
@@ -612,7 +624,7 @@  static const struct vgic_register_region vgic_v3_rd_registers[] = {
 		VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_TYPER,
 		vgic_mmio_read_v3r_typer, vgic_mmio_write_wi,
-		vgic_uaccess_read_v3r_typer, vgic_mmio_uaccess_write_wi, 8,
+		NULL, vgic_mmio_uaccess_write_wi, 8,
 		VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
 	REGISTER_DESC_WITH_LENGTH(GICR_WAKER,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
@@ -714,6 +726,16 @@  int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 
 	vgic_cpu->rdreg = rdreg;
+	vgic_cpu->rdreg_index = rdreg->free_index;
+	if (!rdreg->count) {
+		void *p = krealloc(rdreg->rdist_indices,
+				   (vgic_cpu->rdreg_index + 1) * sizeof(u32),
+				   GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		rdreg->rdist_indices = p;
+	}
+	rdreg->rdist_indices[vgic_cpu->rdreg_index] = vgic_cpu->index;
 
 	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
 
@@ -768,7 +790,7 @@  static int vgic_register_all_redist_iodevs(struct kvm *kvm)
 }
 
 /**
- * vgic_v3_insert_redist_region - Insert a new redistributor region
+ * vgic_v3_alloc_redist_region - Allocate a new redistributor region
  *
  * Performs various checks before inserting the rdist region in the list.
  * Those tests depend on whether the size of the rdist region is known
@@ -782,8 +804,8 @@  static int vgic_register_all_redist_iodevs(struct kvm *kvm)
  *
  * Return 0 on success, < 0 otherwise
  */
-static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
-					gpa_t base, uint32_t count)
+static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index,
+				       gpa_t base, uint32_t count)
 {
 	struct vgic_dist *d = &kvm->arch.vgic;
 	struct vgic_redist_region *rdreg;
@@ -839,6 +861,13 @@  static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
 	rdreg->count = count;
 	rdreg->free_index = 0;
 	rdreg->index = index;
+	if (count) {
+		rdreg->rdist_indices = kcalloc(count, sizeof(u32), GFP_KERNEL);
+		if (!rdreg->rdist_indices) {
+			ret = -ENOMEM;
+			goto free;
+		}
+	}
 
 	list_add_tail(&rdreg->list, rd_regions);
 	return 0;
@@ -847,11 +876,18 @@  static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index,
 	return ret;
 }
 
+void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
+{
+	list_del(&rdreg->list);
+	kfree(rdreg->rdist_indices);
+	kfree(rdreg);
+}
+
 int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
 {
 	int ret;
 
-	ret = vgic_v3_insert_redist_region(kvm, index, addr, count);
+	ret = vgic_v3_alloc_redist_region(kvm, index, addr, count);
 	if (ret)
 		return ret;
 
@@ -864,8 +900,7 @@  int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
 		struct vgic_redist_region *rdreg;
 
 		rdreg = vgic_v3_rdist_region_from_index(kvm, index);
-		list_del(&rdreg->list);
-		kfree(rdreg);
+		vgic_v3_free_redist_region(rdreg);
 		return ret;
 	}
 
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 64fcd75111108..bc418c2c12141 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -293,6 +293,7 @@  vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
 
 struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
 							   u32 index);
+void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
 
 bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
 
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 3d74f1060bd18..9a3f060ac3547 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -197,6 +197,7 @@  struct vgic_redist_region {
 	gpa_t base;
 	u32 count; /* number of redistributors or 0 if single region */
 	u32 free_index; /* index of the next free redistributor */
+	int *rdist_indices; /* indices of the redistributors */
 	struct list_head list;
 };
 
@@ -322,6 +323,8 @@  struct vgic_cpu {
 	 */
 	struct vgic_io_device	rd_iodev;
 	struct vgic_redist_region *rdreg;
+	u32 rdreg_index;
+	int index; /* vcpu index */
 
 	/* Contains the attributes and gpa of the LPI pending tables. */
 	u64 pendbaser;