diff mbox series

KVM: arm/arm64: vgic-its: Add error handling in vgic_its_cache_translation

Message ID 20241128134534.361144-1-keisuke.nishimura@inria.fr (mailing list archive)
State New
Headers show
Series KVM: arm/arm64: vgic-its: Add error handling in vgic_its_cache_translation | expand

Commit Message

Keisuke Nishimura Nov. 28, 2024, 1:45 p.m. UTC
The xa_store() may fail because there is no guarantee that the cache_key
index is already used in its->translation_cache. This fix (1) resolves
the kref inconsistency on failure and (2) returns the error code.

Fixes: 8201d1028caa ("KVM: arm64: vgic-its: Maintain a translation cache per ITS")
Signed-off-by: Keisuke Nishimura <keisuke.nishimura@inria.fr>
---
 arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

Comments

Marc Zyngier Nov. 28, 2024, 4:54 p.m. UTC | #1
On Thu, 28 Nov 2024 13:45:34 +0000,
Keisuke Nishimura <keisuke.nishimura@inria.fr> wrote:
> 
> The xa_store() may fail because there is no guarantee that the cache_key
> index is already used in its->translation_cache. This fix (1) resolves
> the kref inconsistency on failure and (2) returns the error code.

Please describe the failure mode. Under which circumstances can this
fail?

> 
> Fixes: 8201d1028caa ("KVM: arm64: vgic-its: Maintain a translation cache per ITS")
> Signed-off-by: Keisuke Nishimura <keisuke.nishimura@inria.fr>
> ---
>  arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++----
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> index 198296933e7e..8f423857b7d2 100644
> --- a/arch/arm64/kvm/vgic/vgic-its.c
> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> @@ -555,7 +555,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
>  	return irq;
>  }
>  
> -static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
> +static int vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
>  				       u32 devid, u32 eventid,
>  				       struct vgic_irq *irq)
>  {
> @@ -564,7 +564,11 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
>  
>  	/* Do not cache a directly injected interrupt */
>  	if (irq->hw)
> -		return;
> +		return 0;
> +
> +	old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
> +	if (xa_is_err(old))
> +		return xa_err(old);

Accessing the translation cache before the assert that checks the ITS
lock is on its own pretty dodgy...

Thanks,

	M.
Oliver Upton Nov. 28, 2024, 5:55 p.m. UTC | #2
On Thu, Nov 28, 2024 at 02:45:34PM +0100, Keisuke Nishimura wrote:
> The xa_store() may fail because there is no guarantee that the cache_key
> index is already used in its->translation_cache. This fix (1) resolves
> the kref inconsistency on failure and (2) returns the error code.

xa_store() doesn't fail if an entry is already present at the specified
index. It returns the old entry, which is why we have a vgic_put_irq()
on the "error" path.

Genuine error handling definitely is missing here, but that would only
happen if the xarray library failed to allocate (-ENOMEM) or if the
xarray itself is broken beyond repair (-EINVAL).

> Fixes: 8201d1028caa ("KVM: arm64: vgic-its: Maintain a translation cache per ITS")
> Signed-off-by: Keisuke Nishimura <keisuke.nishimura@inria.fr>
> ---
>  arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++----
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
> index 198296933e7e..8f423857b7d2 100644
> --- a/arch/arm64/kvm/vgic/vgic-its.c
> +++ b/arch/arm64/kvm/vgic/vgic-its.c
> @@ -555,7 +555,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
>  	return irq;
>  }
>  
> -static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
> +static int vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
>  				       u32 devid, u32 eventid,
>  				       struct vgic_irq *irq)

This was deliberately made a void return. The entire translation cache
is opportunistic and not required for functional correctness. Nothing
breaks if we fail to insert an entry for, say, a failed memory
allocation.

It would be extremely helpful if you could share the steps to reproduce
the error you observe.
Keisuke Nishimura Nov. 28, 2024, 7:04 p.m. UTC | #3
Hello Marc and Oliver,

Thank you for the replies.

On 28/11/2024 18:55, Oliver Upton wrote:
> On Thu, Nov 28, 2024 at 02:45:34PM +0100, Keisuke Nishimura wrote:
>> The xa_store() may fail because there is no guarantee that the cache_key
>> index is already used in its->translation_cache. This fix (1) resolves
>> the kref inconsistency on failure and (2) returns the error code.
> 
> xa_store() doesn't fail if an entry is already present at the specified
> index. It returns the old entry, which is why we have a vgic_put_irq()
> on the "error" path.

Sure. But to my understanding, this could be the first store to 
its->translation_cache with cache_key, and that is why old can be NULL? I am not 
very familiar with this code, so please correct me if I am wrong.

> 
> Genuine error handling definitely is missing here, but that would only
> happen if the xarray library failed to allocate (-ENOMEM) or if the
> xarray itself is broken beyond repair (-EINVAL).
> 
>> Fixes: 8201d1028caa ("KVM: arm64: vgic-its: Maintain a translation cache per ITS")
>> Signed-off-by: Keisuke Nishimura <keisuke.nishimura@inria.fr>
>> ---
>>   arch/arm64/kvm/vgic/vgic-its.c | 16 ++++++++++++----
>>   1 file changed, 12 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
>> index 198296933e7e..8f423857b7d2 100644
>> --- a/arch/arm64/kvm/vgic/vgic-its.c
>> +++ b/arch/arm64/kvm/vgic/vgic-its.c
>> @@ -555,7 +555,7 @@ static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
>>   	return irq;
>>   }
>>   
>> -static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
>> +static int vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
>>   				       u32 devid, u32 eventid,
>>   				       struct vgic_irq *irq)
> 
> This was deliberately made a void return. The entire translation cache
> is opportunistic and not required for functional correctness. Nothing
> breaks if we fail to insert an entry for, say, a failed memory
> allocation.

If my point above is correct, in the next version, we can ensure no memory error 
here by calling xa_reserve() in advance. Or, we can ignore the error, and just 
make the kref consistent.

> 
> It would be extremely helpful if you could share the steps to reproduce
> the error you observe.
> 

I encountered this when I studied the usage of xa_*. After some investigation to 
confirm that the call of xa_store() might be the first store with cache_key, I 
sent this patch. (I apologize if I missed anything.)

Thank you,
Keisuke
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 198296933e7e..8f423857b7d2 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -555,7 +555,7 @@  static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
 	return irq;
 }
 
-static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
+static int vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
 				       u32 devid, u32 eventid,
 				       struct vgic_irq *irq)
 {
@@ -564,7 +564,11 @@  static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
 
 	/* Do not cache a directly injected interrupt */
 	if (irq->hw)
-		return;
+		return 0;
+
+	old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
+	if (xa_is_err(old))
+		return xa_err(old);
 
 	/*
 	 * The irq refcount is guaranteed to be nonzero while holding the
@@ -578,9 +582,10 @@  static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
 	 * translation behind our back, ensure we don't leak a
 	 * reference if that is the case.
 	 */
-	old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
 	if (old)
 		vgic_put_irq(kvm, old);
+
+	return 0;
 }
 
 static void vgic_its_invalidate_cache(struct vgic_its *its)
@@ -618,6 +623,7 @@  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 {
 	struct kvm_vcpu *vcpu;
 	struct its_ite *ite;
+	int ret;
 
 	if (!its->enabled)
 		return -EBUSY;
@@ -633,7 +639,9 @@  int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 	if (!vgic_lpis_enabled(vcpu))
 		return -EBUSY;
 
-	vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
+	ret = vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq);
+	if (ret)
+		return ret;
 
 	*irq = ite->irq;
 	return 0;