diff mbox series

[RFC,v1,2/4] KVM: arm64: GICv4.1: Try to save hw pending state in save_pending_tables

Message ID 20201123065410.1915-3-lushenming@huawei.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Add VLPI migration support on GICv4.1 | expand

Commit Message

Shenming Lu Nov. 23, 2020, 6:54 a.m. UTC
After pausing all vCPUs and devices capable of interrupting, in order
to save the information of all interrupts, besides flushing the pending
states in kvm’s vgic, we also try to flush the states of VLPIs in the
virtual pending tables into guest RAM, but we need to have GICv4.1 and
safely unmap the vPEs first.

Signed-off-by: Shenming Lu <lushenming@huawei.com>
---
 arch/arm64/kvm/vgic/vgic-v3.c | 62 +++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 6 deletions(-)

Comments

Marc Zyngier Nov. 23, 2020, 9:18 a.m. UTC | #1
On 2020-11-23 06:54, Shenming Lu wrote:
> After pausing all vCPUs and devices capable of interrupting, in order
         ^^^^^^^^^^^^^^^^^
See my comment below about this.

> to save the information of all interrupts, besides flushing the pending
> states in kvm’s vgic, we also try to flush the states of VLPIs in the
> virtual pending tables into guest RAM, but we need to have GICv4.1 and
> safely unmap the vPEs first.
> 
> Signed-off-by: Shenming Lu <lushenming@huawei.com>
> ---
>  arch/arm64/kvm/vgic/vgic-v3.c | 62 +++++++++++++++++++++++++++++++----
>  1 file changed, 56 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c 
> b/arch/arm64/kvm/vgic/vgic-v3.c
> index 9cdf39a94a63..e1b3aa4b2b12 100644
> --- a/arch/arm64/kvm/vgic/vgic-v3.c
> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
> @@ -1,6 +1,8 @@
>  // SPDX-License-Identifier: GPL-2.0-only
> 
>  #include <linux/irqchip/arm-gic-v3.h>
> +#include <linux/irq.h>
> +#include <linux/irqdomain.h>
>  #include <linux/kvm.h>
>  #include <linux/kvm_host.h>
>  #include <kvm/arm_vgic.h>
> @@ -356,6 +358,39 @@ int vgic_v3_lpi_sync_pending_status(struct kvm
> *kvm, struct vgic_irq *irq)
>  	return 0;
>  }
> 
> +/*
> + * With GICv4.1, we can get the VLPI's pending state after unmapping
> + * the vPE. The deactivation of the doorbell interrupt will trigger
> + * the unmapping of the associated vPE.
> + */
> +static void get_vlpi_state_pre(struct vgic_dist *dist)
> +{
> +	struct irq_desc *desc;
> +	int i;
> +
> +	if (!kvm_vgic_global_state.has_gicv4_1)
> +		return;
> +
> +	for (i = 0; i < dist->its_vm.nr_vpes; i++) {
> +		desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
> +		irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
> +	}
> +}
> +
> +static void get_vlpi_state_post(struct vgic_dist *dist)

nit: the naming feels a bit... odd. Pre/post what?

> +{
> +	struct irq_desc *desc;
> +	int i;
> +
> +	if (!kvm_vgic_global_state.has_gicv4_1)
> +		return;
> +
> +	for (i = 0; i < dist->its_vm.nr_vpes; i++) {
> +		desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
> +		irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
> +	}
> +}
> +
>  /**
>   * vgic_v3_save_pending_tables - Save the pending tables into guest 
> RAM
>   * kvm lock and all vcpu lock must be held
> @@ -365,14 +400,17 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>  	struct vgic_dist *dist = &kvm->arch.vgic;
>  	struct vgic_irq *irq;
>  	gpa_t last_ptr = ~(gpa_t)0;
> -	int ret;
> +	int ret = 0;
>  	u8 val;
> 
> +	get_vlpi_state_pre(dist);
> +
>  	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
>  		int byte_offset, bit_nr;
>  		struct kvm_vcpu *vcpu;
>  		gpa_t pendbase, ptr;
>  		bool stored;
> +		bool is_pending = irq->pending_latch;
> 
>  		vcpu = irq->target_vcpu;
>  		if (!vcpu)
> @@ -387,24 +425,36 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>  		if (ptr != last_ptr) {
>  			ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
>  			if (ret)
> -				return ret;
> +				goto out;
>  			last_ptr = ptr;
>  		}
> 
>  		stored = val & (1U << bit_nr);
> -		if (stored == irq->pending_latch)
> +
> +		/* also flush hw pending state */

This comment looks out of place, as we aren't flushing anything.

> +		if (irq->hw) {
> +			WARN_RATELIMIT(irq_get_irqchip_state(irq->host_irq,
> +						IRQCHIP_STATE_PENDING, &is_pending),
> +				       "IRQ %d", irq->host_irq);

Isn't this going to warn like mad on a GICv4.0 system where this, by 
definition,
will generate an error?

> +		}
> +
> +		if (stored == is_pending)
>  			continue;
> 
> -		if (irq->pending_latch)
> +		if (is_pending)
>  			val |= 1 << bit_nr;
>  		else
>  			val &= ~(1 << bit_nr);
> 
>  		ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>  		if (ret)
> -			return ret;
> +			goto out;
>  	}
> -	return 0;
> +
> +out:
> +	get_vlpi_state_post(dist);

This bit worries me: you have unmapped the VPEs, so any interrupt that 
has been
generated during that phase is now forever lost (the GIC doesn't have 
ownership
of the pending tables).

Do you really expect the VM to be restartable from that point? I don't 
see how
this is possible.

> +
> +	return ret;
>  }
> 
>  /**

Thanks,

         M.
Shenming Lu Nov. 24, 2020, 7:40 a.m. UTC | #2
On 2020/11/23 17:18, Marc Zyngier wrote:
> On 2020-11-23 06:54, Shenming Lu wrote:
>> After pausing all vCPUs and devices capable of interrupting, in order
>         ^^^^^^^^^^^^^^^^^
> See my comment below about this.
> 
>> to save the information of all interrupts, besides flushing the pending
>> states in kvm’s vgic, we also try to flush the states of VLPIs in the
>> virtual pending tables into guest RAM, but we need to have GICv4.1 and
>> safely unmap the vPEs first.
>>
>> Signed-off-by: Shenming Lu <lushenming@huawei.com>
>> ---
>>  arch/arm64/kvm/vgic/vgic-v3.c | 62 +++++++++++++++++++++++++++++++----
>>  1 file changed, 56 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
>> index 9cdf39a94a63..e1b3aa4b2b12 100644
>> --- a/arch/arm64/kvm/vgic/vgic-v3.c
>> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
>> @@ -1,6 +1,8 @@
>>  // SPDX-License-Identifier: GPL-2.0-only
>>
>>  #include <linux/irqchip/arm-gic-v3.h>
>> +#include <linux/irq.h>
>> +#include <linux/irqdomain.h>
>>  #include <linux/kvm.h>
>>  #include <linux/kvm_host.h>
>>  #include <kvm/arm_vgic.h>
>> @@ -356,6 +358,39 @@ int vgic_v3_lpi_sync_pending_status(struct kvm
>> *kvm, struct vgic_irq *irq)
>>      return 0;
>>  }
>>
>> +/*
>> + * With GICv4.1, we can get the VLPI's pending state after unmapping
>> + * the vPE. The deactivation of the doorbell interrupt will trigger
>> + * the unmapping of the associated vPE.
>> + */
>> +static void get_vlpi_state_pre(struct vgic_dist *dist)
>> +{
>> +    struct irq_desc *desc;
>> +    int i;
>> +
>> +    if (!kvm_vgic_global_state.has_gicv4_1)
>> +        return;
>> +
>> +    for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>> +        desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>> +        irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
>> +    }
>> +}
>> +
>> +static void get_vlpi_state_post(struct vgic_dist *dist)
> 
> nit: the naming feels a bit... odd. Pre/post what?

My understanding is that the unmapping is a preparation for get_vlpi_state...
Maybe just call it unmap/map_all_vpes?

> 
>> +{
>> +    struct irq_desc *desc;
>> +    int i;
>> +
>> +    if (!kvm_vgic_global_state.has_gicv4_1)
>> +        return;
>> +
>> +    for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>> +        desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>> +        irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
>> +    }
>> +}
>> +
>>  /**
>>   * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
>>   * kvm lock and all vcpu lock must be held
>> @@ -365,14 +400,17 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>>      struct vgic_dist *dist = &kvm->arch.vgic;
>>      struct vgic_irq *irq;
>>      gpa_t last_ptr = ~(gpa_t)0;
>> -    int ret;
>> +    int ret = 0;
>>      u8 val;
>>
>> +    get_vlpi_state_pre(dist);
>> +
>>      list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
>>          int byte_offset, bit_nr;
>>          struct kvm_vcpu *vcpu;
>>          gpa_t pendbase, ptr;
>>          bool stored;
>> +        bool is_pending = irq->pending_latch;
>>
>>          vcpu = irq->target_vcpu;
>>          if (!vcpu)
>> @@ -387,24 +425,36 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
>>          if (ptr != last_ptr) {
>>              ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
>>              if (ret)
>> -                return ret;
>> +                goto out;
>>              last_ptr = ptr;
>>          }
>>
>>          stored = val & (1U << bit_nr);
>> -        if (stored == irq->pending_latch)
>> +
>> +        /* also flush hw pending state */
> 
> This comment looks out of place, as we aren't flushing anything.

Ok, I will correct it.

> 
>> +        if (irq->hw) {
>> +            WARN_RATELIMIT(irq_get_irqchip_state(irq->host_irq,
>> +                        IRQCHIP_STATE_PENDING, &is_pending),
>> +                       "IRQ %d", irq->host_irq);
> 
> Isn't this going to warn like mad on a GICv4.0 system where this, by definition,
> will generate an error?

As we have returned an error in save_its_tables if hw && !has_gicv4_1, we don't
have to warn this here?

> 
>> +        }
>> +
>> +        if (stored == is_pending)
>>              continue;
>>
>> -        if (irq->pending_latch)
>> +        if (is_pending)
>>              val |= 1 << bit_nr;
>>          else
>>              val &= ~(1 << bit_nr);
>>
>>          ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>>          if (ret)
>> -            return ret;
>> +            goto out;
>>      }
>> -    return 0;
>> +
>> +out:
>> +    get_vlpi_state_post(dist);
> 
> This bit worries me: you have unmapped the VPEs, so any interrupt that has been
> generated during that phase is now forever lost (the GIC doesn't have ownership
> of the pending tables).

In my opinion, during this phase, the devices capable of interrupting should have
already been paused (prevent from sending interrupts), such as VFIO migration protocol
has already realized it.

> 
> Do you really expect the VM to be restartable from that point? I don't see how
> this is possible.
> 

If the migration has encountered an error, the src VM might be restarted, so we have to
map the vPEs back.

>> +
>> +    return ret;
>>  }
>>
>>  /**
> 
> Thanks,
> 
>         M.

Thanks,
Shenming
Marc Zyngier Nov. 24, 2020, 8:26 a.m. UTC | #3
On 2020-11-24 07:40, Shenming Lu wrote:
> On 2020/11/23 17:18, Marc Zyngier wrote:
>> On 2020-11-23 06:54, Shenming Lu wrote:
>>> After pausing all vCPUs and devices capable of interrupting, in order
>>         ^^^^^^^^^^^^^^^^^
>> See my comment below about this.
>> 
>>> to save the information of all interrupts, besides flushing the 
>>> pending
>>> states in kvm’s vgic, we also try to flush the states of VLPIs in the
>>> virtual pending tables into guest RAM, but we need to have GICv4.1 
>>> and
>>> safely unmap the vPEs first.
>>> 
>>> Signed-off-by: Shenming Lu <lushenming@huawei.com>
>>> ---
>>>  arch/arm64/kvm/vgic/vgic-v3.c | 62 
>>> +++++++++++++++++++++++++++++++----
>>>  1 file changed, 56 insertions(+), 6 deletions(-)
>>> 
>>> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c 
>>> b/arch/arm64/kvm/vgic/vgic-v3.c
>>> index 9cdf39a94a63..e1b3aa4b2b12 100644
>>> --- a/arch/arm64/kvm/vgic/vgic-v3.c
>>> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
>>> @@ -1,6 +1,8 @@
>>>  // SPDX-License-Identifier: GPL-2.0-only
>>> 
>>>  #include <linux/irqchip/arm-gic-v3.h>
>>> +#include <linux/irq.h>
>>> +#include <linux/irqdomain.h>
>>>  #include <linux/kvm.h>
>>>  #include <linux/kvm_host.h>
>>>  #include <kvm/arm_vgic.h>
>>> @@ -356,6 +358,39 @@ int vgic_v3_lpi_sync_pending_status(struct kvm
>>> *kvm, struct vgic_irq *irq)
>>>      return 0;
>>>  }
>>> 
>>> +/*
>>> + * With GICv4.1, we can get the VLPI's pending state after unmapping
>>> + * the vPE. The deactivation of the doorbell interrupt will trigger
>>> + * the unmapping of the associated vPE.
>>> + */
>>> +static void get_vlpi_state_pre(struct vgic_dist *dist)
>>> +{
>>> +    struct irq_desc *desc;
>>> +    int i;
>>> +
>>> +    if (!kvm_vgic_global_state.has_gicv4_1)
>>> +        return;
>>> +
>>> +    for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>>> +        desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>>> +        irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
>>> +    }
>>> +}
>>> +
>>> +static void get_vlpi_state_post(struct vgic_dist *dist)
>> 
>> nit: the naming feels a bit... odd. Pre/post what?
> 
> My understanding is that the unmapping is a preparation for 
> get_vlpi_state...
> Maybe just call it unmap/map_all_vpes?

Yes, much better.

[...]

>>> +        if (irq->hw) {
>>> +            WARN_RATELIMIT(irq_get_irqchip_state(irq->host_irq,
>>> +                        IRQCHIP_STATE_PENDING, &is_pending),
>>> +                       "IRQ %d", irq->host_irq);
>> 
>> Isn't this going to warn like mad on a GICv4.0 system where this, by 
>> definition,
>> will generate an error?
> 
> As we have returned an error in save_its_tables if hw && !has_gicv4_1, 
> we don't
> have to warn this here?

Are you referring to the check in vgic_its_save_itt() that occurs in 
patch 4?
Fair enough, though I think the use of irq_get_irqchip_state() isn't 
quite
what we want, as per my comments on patch #1.

>> 
>>> +        }
>>> +
>>> +        if (stored == is_pending)
>>>              continue;
>>> 
>>> -        if (irq->pending_latch)
>>> +        if (is_pending)
>>>              val |= 1 << bit_nr;
>>>          else
>>>              val &= ~(1 << bit_nr);
>>> 
>>>          ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>>>          if (ret)
>>> -            return ret;
>>> +            goto out;
>>>      }
>>> -    return 0;
>>> +
>>> +out:
>>> +    get_vlpi_state_post(dist);
>> 
>> This bit worries me: you have unmapped the VPEs, so any interrupt that 
>> has been
>> generated during that phase is now forever lost (the GIC doesn't have 
>> ownership
>> of the pending tables).
> 
> In my opinion, during this phase, the devices capable of interrupting
> should have  already been paused (prevent from sending interrupts),
> such as VFIO migration protocol has already realized it.

Is that a hard guarantee? Pausing devices *may* be possible for a 
limited
set of endpoints, but I'm not sure that is universally possible to 
restart
them and expect a consistent state (you have just dropped a bunch of 
network
packets on the floor...).

>> Do you really expect the VM to be restartable from that point? I don't 
>> see how
>> this is possible.
>> 
> 
> If the migration has encountered an error, the src VM might be
> restarted, so we have to map the vPEs back.

As I said above, I doubt it is universally possible to do so, but
after all, this probably isn't worse that restarting on the target...

         M.
Shenming Lu Nov. 24, 2020, 1:10 p.m. UTC | #4
On 2020/11/24 16:26, Marc Zyngier wrote:
> On 2020-11-24 07:40, Shenming Lu wrote:
>> On 2020/11/23 17:18, Marc Zyngier wrote:
>>> On 2020-11-23 06:54, Shenming Lu wrote:
>>>> After pausing all vCPUs and devices capable of interrupting, in order
>>>         ^^^^^^^^^^^^^^^^^
>>> See my comment below about this.
>>>
>>>> to save the information of all interrupts, besides flushing the pending
>>>> states in kvm’s vgic, we also try to flush the states of VLPIs in the
>>>> virtual pending tables into guest RAM, but we need to have GICv4.1 and
>>>> safely unmap the vPEs first.
>>>>
>>>> Signed-off-by: Shenming Lu <lushenming@huawei.com>
>>>> ---
>>>>  arch/arm64/kvm/vgic/vgic-v3.c | 62 +++++++++++++++++++++++++++++++----
>>>>  1 file changed, 56 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
>>>> index 9cdf39a94a63..e1b3aa4b2b12 100644
>>>> --- a/arch/arm64/kvm/vgic/vgic-v3.c
>>>> +++ b/arch/arm64/kvm/vgic/vgic-v3.c
>>>> @@ -1,6 +1,8 @@
>>>>  // SPDX-License-Identifier: GPL-2.0-only
>>>>
>>>>  #include <linux/irqchip/arm-gic-v3.h>
>>>> +#include <linux/irq.h>
>>>> +#include <linux/irqdomain.h>
>>>>  #include <linux/kvm.h>
>>>>  #include <linux/kvm_host.h>
>>>>  #include <kvm/arm_vgic.h>
>>>> @@ -356,6 +358,39 @@ int vgic_v3_lpi_sync_pending_status(struct kvm
>>>> *kvm, struct vgic_irq *irq)
>>>>      return 0;
>>>>  }
>>>>
>>>> +/*
>>>> + * With GICv4.1, we can get the VLPI's pending state after unmapping
>>>> + * the vPE. The deactivation of the doorbell interrupt will trigger
>>>> + * the unmapping of the associated vPE.
>>>> + */
>>>> +static void get_vlpi_state_pre(struct vgic_dist *dist)
>>>> +{
>>>> +    struct irq_desc *desc;
>>>> +    int i;
>>>> +
>>>> +    if (!kvm_vgic_global_state.has_gicv4_1)
>>>> +        return;
>>>> +
>>>> +    for (i = 0; i < dist->its_vm.nr_vpes; i++) {
>>>> +        desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
>>>> +        irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
>>>> +    }
>>>> +}
>>>> +
>>>> +static void get_vlpi_state_post(struct vgic_dist *dist)
>>>
>>> nit: the naming feels a bit... odd. Pre/post what?
>>
>> My understanding is that the unmapping is a preparation for get_vlpi_state...
>> Maybe just call it unmap/map_all_vpes?
> 
> Yes, much better.
> 
> [...]
> 
>>>> +        if (irq->hw) {
>>>> +            WARN_RATELIMIT(irq_get_irqchip_state(irq->host_irq,
>>>> +                        IRQCHIP_STATE_PENDING, &is_pending),
>>>> +                       "IRQ %d", irq->host_irq);
>>>
>>> Isn't this going to warn like mad on a GICv4.0 system where this, by definition,
>>> will generate an error?
>>
>> As we have returned an error in save_its_tables if hw && !has_gicv4_1, we don't
>> have to warn this here?
> 
> Are you referring to the check in vgic_its_save_itt() that occurs in patch 4?
> Fair enough, though I think the use of irq_get_irqchip_state() isn't quite
> what we want, as per my comments on patch #1.
> 
>>>
>>>> +        }
>>>> +
>>>> +        if (stored == is_pending)
>>>>              continue;
>>>>
>>>> -        if (irq->pending_latch)
>>>> +        if (is_pending)
>>>>              val |= 1 << bit_nr;
>>>>          else
>>>>              val &= ~(1 << bit_nr);
>>>>
>>>>          ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
>>>>          if (ret)
>>>> -            return ret;
>>>> +            goto out;
>>>>      }
>>>> -    return 0;
>>>> +
>>>> +out:
>>>> +    get_vlpi_state_post(dist);
>>>
>>> This bit worries me: you have unmapped the VPEs, so any interrupt that has been
>>> generated during that phase is now forever lost (the GIC doesn't have ownership
>>> of the pending tables).
>>
>> In my opinion, during this phase, the devices capable of interrupting
>> should have  already been paused (prevent from sending interrupts),
>> such as VFIO migration protocol has already realized it.
> 
> Is that a hard guarantee? Pausing devices *may* be possible for a limited
> set of endpoints, but I'm not sure that is universally possible to restart
> them and expect a consistent state (you have just dropped a bunch of network
> packets on the floor...).

No, as far as I know, if the VFIO device does not support pause, the migration would
fail early... And the specific action is decided by the vendor driver.
In fact, the VFIO migration is still in an experimental phase... I will pay attention
to the follow-up development.

> 
>>> Do you really expect the VM to be restartable from that point? I don't see how
>>> this is possible.
>>>
>>
>> If the migration has encountered an error, the src VM might be
>> restarted, so we have to map the vPEs back.
> 
> As I said above, I doubt it is universally possible to do so, but
> after all, this probably isn't worse that restarting on the target...
> 
>         M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 9cdf39a94a63..e1b3aa4b2b12 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -1,6 +1,8 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <kvm/arm_vgic.h>
@@ -356,6 +358,39 @@  int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq)
 	return 0;
 }
 
+/*
+ * With GICv4.1, we can get the VLPI's pending state after unmapping
+ * the vPE. The deactivation of the doorbell interrupt will trigger
+ * the unmapping of the associated vPE.
+ */
+static void get_vlpi_state_pre(struct vgic_dist *dist)
+{
+	struct irq_desc *desc;
+	int i;
+
+	if (!kvm_vgic_global_state.has_gicv4_1)
+		return;
+
+	for (i = 0; i < dist->its_vm.nr_vpes; i++) {
+		desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+		irq_domain_deactivate_irq(irq_desc_get_irq_data(desc));
+	}
+}
+
+static void get_vlpi_state_post(struct vgic_dist *dist)
+{
+	struct irq_desc *desc;
+	int i;
+
+	if (!kvm_vgic_global_state.has_gicv4_1)
+		return;
+
+	for (i = 0; i < dist->its_vm.nr_vpes; i++) {
+		desc = irq_to_desc(dist->its_vm.vpes[i]->irq);
+		irq_domain_activate_irq(irq_desc_get_irq_data(desc), false);
+	}
+}
+
 /**
  * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
  * kvm lock and all vcpu lock must be held
@@ -365,14 +400,17 @@  int vgic_v3_save_pending_tables(struct kvm *kvm)
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct vgic_irq *irq;
 	gpa_t last_ptr = ~(gpa_t)0;
-	int ret;
+	int ret = 0;
 	u8 val;
 
+	get_vlpi_state_pre(dist);
+
 	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
 		int byte_offset, bit_nr;
 		struct kvm_vcpu *vcpu;
 		gpa_t pendbase, ptr;
 		bool stored;
+		bool is_pending = irq->pending_latch;
 
 		vcpu = irq->target_vcpu;
 		if (!vcpu)
@@ -387,24 +425,36 @@  int vgic_v3_save_pending_tables(struct kvm *kvm)
 		if (ptr != last_ptr) {
 			ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
 			if (ret)
-				return ret;
+				goto out;
 			last_ptr = ptr;
 		}
 
 		stored = val & (1U << bit_nr);
-		if (stored == irq->pending_latch)
+
+		/* also flush hw pending state */
+		if (irq->hw) {
+			WARN_RATELIMIT(irq_get_irqchip_state(irq->host_irq,
+						IRQCHIP_STATE_PENDING, &is_pending),
+				       "IRQ %d", irq->host_irq);
+		}
+
+		if (stored == is_pending)
 			continue;
 
-		if (irq->pending_latch)
+		if (is_pending)
 			val |= 1 << bit_nr;
 		else
 			val &= ~(1 << bit_nr);
 
 		ret = kvm_write_guest_lock(kvm, ptr, &val, 1);
 		if (ret)
-			return ret;
+			goto out;
 	}
-	return 0;
+
+out:
+	get_vlpi_state_post(dist);
+
+	return ret;
 }
 
 /**