diff mbox series

[v5,20/23] KVM: arm64: GICv4.1: Plumb SGI implementation selection in the distributor

Message ID 20200304203330.4967-21-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series irqchip/gic-v4: GICv4.1 architecture support | expand

Commit Message

Marc Zyngier March 4, 2020, 8:33 p.m. UTC
The GICv4.1 architecture gives the hypervisor the option to let
the guest choose whether it wants the good old SGIs with an
active state, or the new, HW-based ones that do not have one.

For this, plumb the configuration of SGIs into the GICv3 MMIO
handling, present the GICD_TYPER2.nASSGIcap to the guest,
and handle the GICD_CTLR.nASSGIreq setting.

In order to be able to deal with the restore of a guest, also
apply the GICD_CTLR.nASSGIreq setting at first run so that we
can move the restored SGIs to the HW if that's what the guest
had selected in a previous life.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 48 ++++++++++++++++++++++++++++++--
 virt/kvm/arm/vgic/vgic-v3.c      |  2 ++
 2 files changed, 48 insertions(+), 2 deletions(-)

Comments

Zenghui Yu March 18, 2020, 6:34 a.m. UTC | #1
Hi Marc,

On 2020/3/5 4:33, Marc Zyngier wrote:
> The GICv4.1 architecture gives the hypervisor the option to let
> the guest choose whether it wants the good old SGIs with an
> active state, or the new, HW-based ones that do not have one.
> 
> For this, plumb the configuration of SGIs into the GICv3 MMIO
> handling, present the GICD_TYPER2.nASSGIcap to the guest,
> and handle the GICD_CTLR.nASSGIreq setting.
> 
> In order to be able to deal with the restore of a guest, also
> apply the GICD_CTLR.nASSGIreq setting at first run so that we
> can move the restored SGIs to the HW if that's what the guest
> had selected in a previous life.

I'm okay with the restore path.  But it seems that we still fail to
save the pending state of vSGI - software pending_latch of HW-based
vSGIs will not be updated (and always be false) because we directly
inject them through ITS, so vgic_v3_uaccess_read_pending() can't
tell the correct pending state to user-space (the correct one should
be latched in HW).

It would be good if we can sync the hardware state into pending_latch
at an appropriate time (just before save), but not sure if we can...

> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>   virt/kvm/arm/vgic/vgic-mmio-v3.c | 48 ++++++++++++++++++++++++++++++--
>   virt/kvm/arm/vgic/vgic-v3.c      |  2 ++
>   2 files changed, 48 insertions(+), 2 deletions(-)
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index de89da76a379..442f3b8c2559 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -3,6 +3,7 @@
>    * VGICv3 MMIO handling functions
>    */
>   
> +#include <linux/bitfield.h>
>   #include <linux/irqchip/arm-gic-v3.h>
>   #include <linux/kvm.h>
>   #include <linux/kvm_host.h>
> @@ -70,6 +71,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
>   		if (vgic->enabled)
>   			value |= GICD_CTLR_ENABLE_SS_G1;
>   		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
> +		if (kvm_vgic_global_state.has_gicv4_1 && vgic->nassgireq)

Looking at how we handle the GICD_CTLR.nASSGIreq setting, I think
"nassgireq==true" already indicates "has_gicv4_1==true".  So this
can be simplified.

But I wonder that should we use nassgireq to *only* keep track what
the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
lose the guest-request bit after migration among hosts with different
has_gicv4_1 settings.


The remaining patches all look good to me :-). I will wait for you to
confirm these two concerns.


Thanks,
Zenghui

> +			value |= GICD_CTLR_nASSGIreq;
>   		break;
>   	case GICD_TYPER:
>   		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
> @@ -81,6 +84,10 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
>   			value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
>   		}
>   		break;
> +	case GICD_TYPER2:
> +		if (kvm_vgic_global_state.has_gicv4_1)
> +			value = GICD_TYPER2_nASSGIcap;
> +		break;
>   	case GICD_IIDR:
>   		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
>   			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
> @@ -98,17 +105,43 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
>   				    unsigned long val)
>   {
>   	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> -	bool was_enabled = dist->enabled;
>   
>   	switch (addr & 0x0c) {
> -	case GICD_CTLR:
> +	case GICD_CTLR: {
> +		bool was_enabled, is_hwsgi;
> +
> +		mutex_lock(&vcpu->kvm->lock);
> +
> +		was_enabled = dist->enabled;
> +		is_hwsgi = dist->nassgireq;
> +
>   		dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
>   
> +		/* Not a GICv4.1? No HW SGIs */
> +		if (!kvm_vgic_global_state.has_gicv4_1)
> +			val &= ~GICD_CTLR_nASSGIreq;
> +
> +		/* Dist stays enabled? nASSGIreq is RO */
> +		if (was_enabled && dist->enabled) {
> +			val &= ~GICD_CTLR_nASSGIreq;
> +			val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi);
> +		}
> +
> +		/* Switching HW SGIs? */
> +		dist->nassgireq = val & GICD_CTLR_nASSGIreq;
> +		if (is_hwsgi != dist->nassgireq)
> +			vgic_v4_configure_vsgis(vcpu->kvm);
> +
>   		if (!was_enabled && dist->enabled)
>   			vgic_kick_vcpus(vcpu->kvm);
> +
> +		mutex_unlock(&vcpu->kvm->lock);
>   		break;
> +	}
>   	case GICD_TYPER:
> +	case GICD_TYPER2:
>   	case GICD_IIDR:
> +		/* This is at best for documentation purposes... */
>   		return;
>   	}
>   }
> @@ -117,10 +150,21 @@ static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
>   					   gpa_t addr, unsigned int len,
>   					   unsigned long val)
>   {
> +	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> +
>   	switch (addr & 0x0c) {
>   	case GICD_IIDR:
>   		if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
>   			return -EINVAL;
> +		return 0;
> +	case GICD_CTLR:
> +		/* Not a GICv4.1? No HW SGIs */
> +		if (!kvm_vgic_global_state.has_gicv4_1)
> +			val &= ~GICD_CTLR_nASSGIreq;
> +
> +		dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
> +		dist->nassgireq = val & GICD_CTLR_nASSGIreq;
> +		return 0;
>   	}
>   
>   	vgic_mmio_write_v3_misc(vcpu, addr, len, val);
> diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
> index 1bc09b523486..2c9fc13e2c59 100644
> --- a/virt/kvm/arm/vgic/vgic-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-v3.c
> @@ -540,6 +540,8 @@ int vgic_v3_map_resources(struct kvm *kvm)
>   		goto out;
>   	}
>   
> +	if (kvm_vgic_global_state.has_gicv4_1)
> +		vgic_v4_configure_vsgis(kvm);
>   	dist->ready = true;
>   
>   out:
>
Marc Zyngier March 19, 2020, 12:10 p.m. UTC | #2
Hi Zenghui,

On 2020-03-18 06:34, Zenghui Yu wrote:
> Hi Marc,
> 
> On 2020/3/5 4:33, Marc Zyngier wrote:
>> The GICv4.1 architecture gives the hypervisor the option to let
>> the guest choose whether it wants the good old SGIs with an
>> active state, or the new, HW-based ones that do not have one.
>> 
>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>> and handle the GICD_CTLR.nASSGIreq setting.
>> 
>> In order to be able to deal with the restore of a guest, also
>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>> can move the restored SGIs to the HW if that's what the guest
>> had selected in a previous life.
> 
> I'm okay with the restore path.  But it seems that we still fail to
> save the pending state of vSGI - software pending_latch of HW-based
> vSGIs will not be updated (and always be false) because we directly
> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
> tell the correct pending state to user-space (the correct one should
> be latched in HW).
> 
> It would be good if we can sync the hardware state into pending_latch
> at an appropriate time (just before save), but not sure if we can...

The problem is to find the "appropriate time". It would require to 
define
a point in the save sequence where we transition the state from HW to
SW. I'm not keen on adding more state than we already have.

But what we can do is to just ask the HW to give us the right state
on userspace access, at all times. How about this:

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 48fd9fc229a2..281fe7216c59 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -305,8 +305,18 @@ static unsigned long 
vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
  	 */
  	for (i = 0; i < len * 8; i++) {
  		struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+		bool state = irq->pending_latch;

-		if (irq->pending_latch)
+		if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
+			int err;
+
+			err = irq_get_irqchip_state(irq->host_irq,
+						    IRQCHIP_STATE_PENDING,
+						    &state);
+			WARN_ON(err);
+		}
+
+		if (state)
  			value |= (1U << i);

  		vgic_put_irq(vcpu->kvm, irq);

I can add this to "KVM: arm64: GICv4.1: Add direct injection capability 
to SGI registers".

> 
>> 
>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>> ---
>>   virt/kvm/arm/vgic/vgic-mmio-v3.c | 48 
>> ++++++++++++++++++++++++++++++--
>>   virt/kvm/arm/vgic/vgic-v3.c      |  2 ++
>>   2 files changed, 48 insertions(+), 2 deletions(-)
>> 
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> index de89da76a379..442f3b8c2559 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> @@ -3,6 +3,7 @@
>>    * VGICv3 MMIO handling functions
>>    */
>>   +#include <linux/bitfield.h>
>>   #include <linux/irqchip/arm-gic-v3.h>
>>   #include <linux/kvm.h>
>>   #include <linux/kvm_host.h>
>> @@ -70,6 +71,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct 
>> kvm_vcpu *vcpu,
>>   		if (vgic->enabled)
>>   			value |= GICD_CTLR_ENABLE_SS_G1;
>>   		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
>> +		if (kvm_vgic_global_state.has_gicv4_1 && vgic->nassgireq)
> 
> Looking at how we handle the GICD_CTLR.nASSGIreq setting, I think
> "nassgireq==true" already indicates "has_gicv4_1==true".  So this
> can be simplified.

Indeed. I've now dropped the has_gicv4.1 check.

> But I wonder that should we use nassgireq to *only* keep track what
> the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
> lose the guest-request bit after migration among hosts with different
> has_gicv4_1 settings.

I'm unsure of what you're suggesting here. If userspace tries to set
GICD_CTLR.nASSGIreq on a non-4.1 host, this bit will not latch.
Userspace can check that at restore time. Or we could fail the
userspace write, which is a bit odd (the bit is otherwise RES0).

Could you clarify your proposal?

> The remaining patches all look good to me :-). I will wait for you to
> confirm these two concerns.

Thanks,

         M.
Eric Auger March 19, 2020, 8:38 p.m. UTC | #3
Hi Marc,
On 3/19/20 1:10 PM, Marc Zyngier wrote:
> Hi Zenghui,
> 
> On 2020-03-18 06:34, Zenghui Yu wrote:
>> Hi Marc,
>>
>> On 2020/3/5 4:33, Marc Zyngier wrote:
>>> The GICv4.1 architecture gives the hypervisor the option to let
>>> the guest choose whether it wants the good old SGIs with an
>>> active state, or the new, HW-based ones that do not have one.
>>>
>>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>>> and handle the GICD_CTLR.nASSGIreq setting.
>>>
>>> In order to be able to deal with the restore of a guest, also
>>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>>> can move the restored SGIs to the HW if that's what the guest
>>> had selected in a previous life.
>>
>> I'm okay with the restore path.  But it seems that we still fail to
>> save the pending state of vSGI - software pending_latch of HW-based
>> vSGIs will not be updated (and always be false) because we directly
>> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
>> tell the correct pending state to user-space (the correct one should
>> be latched in HW).
>>
>> It would be good if we can sync the hardware state into pending_latch
>> at an appropriate time (just before save), but not sure if we can...
> 
> The problem is to find the "appropriate time". It would require to define
> a point in the save sequence where we transition the state from HW to
> SW. I'm not keen on adding more state than we already have.

may be we could use a dedicated device group/attr as we have for the ITS
save tables? the user space would choose.

Thanks

Eric
> 
> But what we can do is to just ask the HW to give us the right state
> on userspace access, at all times. How about this:
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index 48fd9fc229a2..281fe7216c59 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -305,8 +305,18 @@ static unsigned long
> vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
>       */
>      for (i = 0; i < len * 8; i++) {
>          struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
> +        bool state = irq->pending_latch;
> 
> -        if (irq->pending_latch)
> +        if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
> +            int err;
> +
> +            err = irq_get_irqchip_state(irq->host_irq,
> +                            IRQCHIP_STATE_PENDING,
> +                            &state);
> +            WARN_ON(err);
> +        }
> +
> +        if (state)
>              value |= (1U << i);
> 
>          vgic_put_irq(vcpu->kvm, irq);
> 
> I can add this to "KVM: arm64: GICv4.1: Add direct injection capability
> to SGI registers".
> 
>>
>>>
>>> Signed-off-by: Marc Zyngier <maz@kernel.org>
>>> ---
>>>   virt/kvm/arm/vgic/vgic-mmio-v3.c | 48 ++++++++++++++++++++++++++++++--
>>>   virt/kvm/arm/vgic/vgic-v3.c      |  2 ++
>>>   2 files changed, 48 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> index de89da76a379..442f3b8c2559 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> @@ -3,6 +3,7 @@
>>>    * VGICv3 MMIO handling functions
>>>    */
>>>   +#include <linux/bitfield.h>
>>>   #include <linux/irqchip/arm-gic-v3.h>
>>>   #include <linux/kvm.h>
>>>   #include <linux/kvm_host.h>
>>> @@ -70,6 +71,8 @@ static unsigned long vgic_mmio_read_v3_misc(struct
>>> kvm_vcpu *vcpu,
>>>           if (vgic->enabled)
>>>               value |= GICD_CTLR_ENABLE_SS_G1;
>>>           value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
>>> +        if (kvm_vgic_global_state.has_gicv4_1 && vgic->nassgireq)
>>
>> Looking at how we handle the GICD_CTLR.nASSGIreq setting, I think
>> "nassgireq==true" already indicates "has_gicv4_1==true".  So this
>> can be simplified.
> 
> Indeed. I've now dropped the has_gicv4.1 check.
> 
>> But I wonder that should we use nassgireq to *only* keep track what
>> the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
>> lose the guest-request bit after migration among hosts with different
>> has_gicv4_1 settings.
> 
> I'm unsure of what you're suggesting here. If userspace tries to set
> GICD_CTLR.nASSGIreq on a non-4.1 host, this bit will not latch.
> Userspace can check that at restore time. Or we could fail the
> userspace write, which is a bit odd (the bit is otherwise RES0).
> 
> Could you clarify your proposal?
> 
>> The remaining patches all look good to me :-). I will wait for you to
>> confirm these two concerns.
> 
> Thanks,
> 
>         M.
Zenghui Yu March 20, 2020, 3:08 a.m. UTC | #4
On 2020/3/20 4:38, Auger Eric wrote:
> Hi Marc,
> On 3/19/20 1:10 PM, Marc Zyngier wrote:
>> Hi Zenghui,
>>
>> On 2020-03-18 06:34, Zenghui Yu wrote:
>>> Hi Marc,
>>>
>>> On 2020/3/5 4:33, Marc Zyngier wrote:
>>>> The GICv4.1 architecture gives the hypervisor the option to let
>>>> the guest choose whether it wants the good old SGIs with an
>>>> active state, or the new, HW-based ones that do not have one.
>>>>
>>>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>>>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>>>> and handle the GICD_CTLR.nASSGIreq setting.
>>>>
>>>> In order to be able to deal with the restore of a guest, also
>>>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>>>> can move the restored SGIs to the HW if that's what the guest
>>>> had selected in a previous life.
>>>
>>> I'm okay with the restore path.  But it seems that we still fail to
>>> save the pending state of vSGI - software pending_latch of HW-based
>>> vSGIs will not be updated (and always be false) because we directly
>>> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
>>> tell the correct pending state to user-space (the correct one should
>>> be latched in HW).
>>>
>>> It would be good if we can sync the hardware state into pending_latch
>>> at an appropriate time (just before save), but not sure if we can...
>>
>> The problem is to find the "appropriate time". It would require to define
>> a point in the save sequence where we transition the state from HW to
>> SW. I'm not keen on adding more state than we already have.
> 
> may be we could use a dedicated device group/attr as we have for the ITS
> save tables? the user space would choose.

It means that userspace will be aware of some form of GICv4.1 details
(e.g., get/set vSGI state at HW level) that KVM has implemented.
Is it something that userspace required to know? I'm open to this ;-)

> 
> Thanks
> 
> Eric
>>
>> But what we can do is to just ask the HW to give us the right state
>> on userspace access, at all times. How about this:
>>
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> index 48fd9fc229a2..281fe7216c59 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> @@ -305,8 +305,18 @@ static unsigned long
>> vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
>>        */
>>       for (i = 0; i < len * 8; i++) {
>>           struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
>> +        bool state = irq->pending_latch;
>>
>> -        if (irq->pending_latch)
>> +        if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
>> +            int err;
>> +
>> +            err = irq_get_irqchip_state(irq->host_irq,
>> +                            IRQCHIP_STATE_PENDING,
>> +                            &state);
>> +            WARN_ON(err);
>> +        }
>> +
>> +        if (state)
>>               value |= (1U << i);
>>
>>           vgic_put_irq(vcpu->kvm, irq);

Anyway this looks good to me and will do the right thing on a userspace
save.

>>
>> I can add this to "KVM: arm64: GICv4.1: Add direct injection capability
>> to SGI registers".

Thanks,
Zenghui
Zenghui Yu March 20, 2020, 3:53 a.m. UTC | #5
Hi Marc,

On 2020/3/19 20:10, Marc Zyngier wrote:
>> But I wonder that should we use nassgireq to *only* keep track what
>> the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
>> lose the guest-request bit after migration among hosts with different
>> has_gicv4_1 settings.
> 
> I'm unsure of what you're suggesting here. If userspace tries to set
> GICD_CTLR.nASSGIreq on a non-4.1 host, this bit will not latch.

This is exactly what I *was* concerning about.

> Userspace can check that at restore time. Or we could fail the
> userspace write, which is a bit odd (the bit is otherwise RES0).
> 
> Could you clarify your proposal?

Let's assume two hosts below. 'has_gicv4_1' is true on host-A while
it is false on host-B because of lack of HW support or the kernel
parameter "kvm-arm.vgic_v4_enable=0".

If we migrate guest through A->B->A, we may end-up lose the initial
guest-request "nASSGIreq=1" and don't use direct vSGI delivery for
this guest when it's migrated back to host-A.

This can be "fixed" by keep track of what guest had written into
nASSGIreq. And we need to evaluate the need for using direct vSGI
for a specified guest by 'has_gicv4_1 && nassgireq'.

But if it's expected that "if userspace tries to set nASSGIreq on
a non-4.1 host, this bit will not latch", then this shouldn't be
a problem at all.

Anyway this is not a big deal to me and I won't complain about it
in the future ;-) Either way, for this patch:

Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>


Thanks
Eric Auger March 20, 2020, 7:59 a.m. UTC | #6
Hi Zenghui,

On 3/20/20 4:08 AM, Zenghui Yu wrote:
> On 2020/3/20 4:38, Auger Eric wrote:
>> Hi Marc,
>> On 3/19/20 1:10 PM, Marc Zyngier wrote:
>>> Hi Zenghui,
>>>
>>> On 2020-03-18 06:34, Zenghui Yu wrote:
>>>> Hi Marc,
>>>>
>>>> On 2020/3/5 4:33, Marc Zyngier wrote:
>>>>> The GICv4.1 architecture gives the hypervisor the option to let
>>>>> the guest choose whether it wants the good old SGIs with an
>>>>> active state, or the new, HW-based ones that do not have one.
>>>>>
>>>>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>>>>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>>>>> and handle the GICD_CTLR.nASSGIreq setting.
>>>>>
>>>>> In order to be able to deal with the restore of a guest, also
>>>>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>>>>> can move the restored SGIs to the HW if that's what the guest
>>>>> had selected in a previous life.
>>>>
>>>> I'm okay with the restore path.  But it seems that we still fail to
>>>> save the pending state of vSGI - software pending_latch of HW-based
>>>> vSGIs will not be updated (and always be false) because we directly
>>>> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
>>>> tell the correct pending state to user-space (the correct one should
>>>> be latched in HW).
>>>>
>>>> It would be good if we can sync the hardware state into pending_latch
>>>> at an appropriate time (just before save), but not sure if we can...
>>>
>>> The problem is to find the "appropriate time". It would require to
>>> define
>>> a point in the save sequence where we transition the state from HW to
>>> SW. I'm not keen on adding more state than we already have.
>>
>> may be we could use a dedicated device group/attr as we have for the ITS
>> save tables? the user space would choose.
> 
> It means that userspace will be aware of some form of GICv4.1 details
> (e.g., get/set vSGI state at HW level) that KVM has implemented.
> Is it something that userspace required to know? I'm open to this ;-)
Not sure we would be obliged to expose fine details. This could be a
generic save/restore device group/attr whose implementation at KVM level
could differ depending on the version being implemented, no?

Thanks

Eric
> 
>>
>> Thanks
>>
>> Eric
>>>
>>> But what we can do is to just ask the HW to give us the right state
>>> on userspace access, at all times. How about this:
>>>
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> index 48fd9fc229a2..281fe7216c59 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> @@ -305,8 +305,18 @@ static unsigned long
>>> vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
>>>        */
>>>       for (i = 0; i < len * 8; i++) {
>>>           struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid
>>> + i);
>>> +        bool state = irq->pending_latch;
>>>
>>> -        if (irq->pending_latch)
>>> +        if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
>>> +            int err;
>>> +
>>> +            err = irq_get_irqchip_state(irq->host_irq,
>>> +                            IRQCHIP_STATE_PENDING,
>>> +                            &state);
>>> +            WARN_ON(err);
>>> +        }
>>> +
>>> +        if (state)
>>>               value |= (1U << i);
>>>
>>>           vgic_put_irq(vcpu->kvm, irq);
> 
> Anyway this looks good to me and will do the right thing on a userspace
> save.
> 
>>>
>>> I can add this to "KVM: arm64: GICv4.1: Add direct injection capability
>>> to SGI registers".
> 
> Thanks,
> Zenghui
>
Marc Zyngier March 20, 2020, 9:01 a.m. UTC | #7
Hi Zenghui,

On 2020-03-20 03:53, Zenghui Yu wrote:
> Hi Marc,
> 
> On 2020/3/19 20:10, Marc Zyngier wrote:
>>> But I wonder that should we use nassgireq to *only* keep track what
>>> the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
>>> lose the guest-request bit after migration among hosts with different
>>> has_gicv4_1 settings.
>> 
>> I'm unsure of what you're suggesting here. If userspace tries to set
>> GICD_CTLR.nASSGIreq on a non-4.1 host, this bit will not latch.
> 
> This is exactly what I *was* concerning about.
> 
>> Userspace can check that at restore time. Or we could fail the
>> userspace write, which is a bit odd (the bit is otherwise RES0).
>> 
>> Could you clarify your proposal?
> 
> Let's assume two hosts below. 'has_gicv4_1' is true on host-A while
> it is false on host-B because of lack of HW support or the kernel
> parameter "kvm-arm.vgic_v4_enable=0".
> 
> If we migrate guest through A->B->A, we may end-up lose the initial
> guest-request "nASSGIreq=1" and don't use direct vSGI delivery for
> this guest when it's migrated back to host-A.

My point above is that we shouldn't allow the A->B migration the first
place, and fail it as quickly as possible. We don't know what the guest
has observed in terms of GIC capability, and it may not have enabled the
new flavour of SGIs just yet.

> This can be "fixed" by keep track of what guest had written into
> nASSGIreq. And we need to evaluate the need for using direct vSGI
> for a specified guest by 'has_gicv4_1 && nassgireq'.

It feels odd. It means we have more state than the HW normally has.
I have an alternative proposal, see below.

> But if it's expected that "if userspace tries to set nASSGIreq on
> a non-4.1 host, this bit will not latch", then this shouldn't be
> a problem at all.

Well, that is the semantics of the RES0 bit. It applies from both
guest and userspace.

And actually, maybe we can handle that pretty cheaply. If userspace
tries to restore GICD_TYPER2 to a value that isn't what KVM can
offer, we just return an error. Exactly like we do for GICD_IIDR.
Hence the following patch:

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index 28b639fd1abc..e72dcc454247 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -156,6 +156,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct 
kvm_vcpu *vcpu,
  	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;

  	switch (addr & 0x0c) {
+	case GICD_TYPER2:
  	case GICD_IIDR:
  		if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
  			return -EINVAL;

Being a RO register, writing something that isn't compatible with the
possible behaviour of the hypervisor will just return an error.

What do you think?

> Anyway this is not a big deal to me and I won't complain about it
> in the future ;-) Either way, for this patch:
> 
> Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>

Thanks a lot!

         M.
Marc Zyngier March 20, 2020, 9:46 a.m. UTC | #8
On 2020-03-20 07:59, Auger Eric wrote:
> Hi Zenghui,
> 
> On 3/20/20 4:08 AM, Zenghui Yu wrote:
>> On 2020/3/20 4:38, Auger Eric wrote:
>>> Hi Marc,
>>> On 3/19/20 1:10 PM, Marc Zyngier wrote:
>>>> Hi Zenghui,
>>>> 
>>>> On 2020-03-18 06:34, Zenghui Yu wrote:
>>>>> Hi Marc,
>>>>> 
>>>>> On 2020/3/5 4:33, Marc Zyngier wrote:
>>>>>> The GICv4.1 architecture gives the hypervisor the option to let
>>>>>> the guest choose whether it wants the good old SGIs with an
>>>>>> active state, or the new, HW-based ones that do not have one.
>>>>>> 
>>>>>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>>>>>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>>>>>> and handle the GICD_CTLR.nASSGIreq setting.
>>>>>> 
>>>>>> In order to be able to deal with the restore of a guest, also
>>>>>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>>>>>> can move the restored SGIs to the HW if that's what the guest
>>>>>> had selected in a previous life.
>>>>> 
>>>>> I'm okay with the restore path.  But it seems that we still fail to
>>>>> save the pending state of vSGI - software pending_latch of HW-based
>>>>> vSGIs will not be updated (and always be false) because we directly
>>>>> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
>>>>> tell the correct pending state to user-space (the correct one 
>>>>> should
>>>>> be latched in HW).
>>>>> 
>>>>> It would be good if we can sync the hardware state into 
>>>>> pending_latch
>>>>> at an appropriate time (just before save), but not sure if we 
>>>>> can...
>>>> 
>>>> The problem is to find the "appropriate time". It would require to
>>>> define
>>>> a point in the save sequence where we transition the state from HW 
>>>> to
>>>> SW. I'm not keen on adding more state than we already have.
>>> 
>>> may be we could use a dedicated device group/attr as we have for the 
>>> ITS
>>> save tables? the user space would choose.
>> 
>> It means that userspace will be aware of some form of GICv4.1 details
>> (e.g., get/set vSGI state at HW level) that KVM has implemented.
>> Is it something that userspace required to know? I'm open to this ;-)
> Not sure we would be obliged to expose fine details. This could be a
> generic save/restore device group/attr whose implementation at KVM 
> level
> could differ depending on the version being implemented, no?

What prevents us from hooking this synchronization to the current 
behaviour
of KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES? After all, this is already the 
point
where we synchronize the KVM view of the pending state with userspace.
Here, it's just a matter of picking the information from some other 
place
(i.e. the host's virtual pending table).

The thing we need though is the guarantee that the guest isn't going to
get more vLPIs at that stage, as they would be lost. This effectively
assumes that we can also save/restore the state of the signalling 
devices,
and I don't know if we're quite there yet.

Thanks,

         M.
Eric Auger March 20, 2020, 11:09 a.m. UTC | #9
Hi Marc,

On 3/20/20 10:46 AM, Marc Zyngier wrote:
> On 2020-03-20 07:59, Auger Eric wrote:
>> Hi Zenghui,
>>
>> On 3/20/20 4:08 AM, Zenghui Yu wrote:
>>> On 2020/3/20 4:38, Auger Eric wrote:
>>>> Hi Marc,
>>>> On 3/19/20 1:10 PM, Marc Zyngier wrote:
>>>>> Hi Zenghui,
>>>>>
>>>>> On 2020-03-18 06:34, Zenghui Yu wrote:
>>>>>> Hi Marc,
>>>>>>
>>>>>> On 2020/3/5 4:33, Marc Zyngier wrote:
>>>>>>> The GICv4.1 architecture gives the hypervisor the option to let
>>>>>>> the guest choose whether it wants the good old SGIs with an
>>>>>>> active state, or the new, HW-based ones that do not have one.
>>>>>>>
>>>>>>> For this, plumb the configuration of SGIs into the GICv3 MMIO
>>>>>>> handling, present the GICD_TYPER2.nASSGIcap to the guest,
>>>>>>> and handle the GICD_CTLR.nASSGIreq setting.
>>>>>>>
>>>>>>> In order to be able to deal with the restore of a guest, also
>>>>>>> apply the GICD_CTLR.nASSGIreq setting at first run so that we
>>>>>>> can move the restored SGIs to the HW if that's what the guest
>>>>>>> had selected in a previous life.
>>>>>>
>>>>>> I'm okay with the restore path.  But it seems that we still fail to
>>>>>> save the pending state of vSGI - software pending_latch of HW-based
>>>>>> vSGIs will not be updated (and always be false) because we directly
>>>>>> inject them through ITS, so vgic_v3_uaccess_read_pending() can't
>>>>>> tell the correct pending state to user-space (the correct one should
>>>>>> be latched in HW).
>>>>>>
>>>>>> It would be good if we can sync the hardware state into pending_latch
>>>>>> at an appropriate time (just before save), but not sure if we can...
>>>>>
>>>>> The problem is to find the "appropriate time". It would require to
>>>>> define
>>>>> a point in the save sequence where we transition the state from HW to
>>>>> SW. I'm not keen on adding more state than we already have.
>>>>
>>>> may be we could use a dedicated device group/attr as we have for the
>>>> ITS
>>>> save tables? the user space would choose.
>>>
>>> It means that userspace will be aware of some form of GICv4.1 details
>>> (e.g., get/set vSGI state at HW level) that KVM has implemented.
>>> Is it something that userspace required to know? I'm open to this ;-)
>> Not sure we would be obliged to expose fine details. This could be a
>> generic save/restore device group/attr whose implementation at KVM level
>> could differ depending on the version being implemented, no?
> 
> What prevents us from hooking this synchronization to the current behaviour
> of KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES? After all, this is already the
> point
> where we synchronize the KVM view of the pending state with userspace.
> Here, it's just a matter of picking the information from some other place
> (i.e. the host's virtual pending table).
agreed
> 
> The thing we need though is the guarantee that the guest isn't going to
> get more vLPIs at that stage, as they would be lost. This effectively
> assumes that we can also save/restore the state of the signalling devices,
> and I don't know if we're quite there yet.
On QEMU, when KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES is called, the VM is
stopped.
See cddafd8f353d ("hw/intc/arm_gicv3_its: Implement state save/restore")
So I think it should work, no?

Thanks

Eric

> 
> Thanks,
> 
>         M.
Marc Zyngier March 20, 2020, 11:20 a.m. UTC | #10
Hi Eric,

On 2020-03-20 11:09, Auger Eric wrote:
> Hi Marc,

[...]

>>>> It means that userspace will be aware of some form of GICv4.1 
>>>> details
>>>> (e.g., get/set vSGI state at HW level) that KVM has implemented.
>>>> Is it something that userspace required to know? I'm open to this 
>>>> ;-)
>>> Not sure we would be obliged to expose fine details. This could be a
>>> generic save/restore device group/attr whose implementation at KVM 
>>> level
>>> could differ depending on the version being implemented, no?
>> 
>> What prevents us from hooking this synchronization to the current 
>> behaviour
>> of KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES? After all, this is already 
>> the
>> point
>> where we synchronize the KVM view of the pending state with userspace.
>> Here, it's just a matter of picking the information from some other 
>> place
>> (i.e. the host's virtual pending table).
> agreed
>> 
>> The thing we need though is the guarantee that the guest isn't going 
>> to
>> get more vLPIs at that stage, as they would be lost. This effectively
>> assumes that we can also save/restore the state of the signalling 
>> devices,
>> and I don't know if we're quite there yet.
> On QEMU, when KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES is called, the VM is
> stopped.
> See cddafd8f353d ("hw/intc/arm_gicv3_its: Implement state 
> save/restore")
> So I think it should work, no?

The guest being stopped is a good start. But my concern is on the device 
side.

If the device is still active (generating interrupts), these interrupts 
will
be dropped because the vPE will have been unmapped from the ITS in order 
to
clean the ITS caches and make sure the virtual pending table is up to 
date.

In turn, restoring the guest may lead to a lockup because we would have 
lost
these interrupts. What does QEMU on x86 do in this case?

Thanks,

         M.
Zenghui Yu March 23, 2020, 8:11 a.m. UTC | #11
Hi Marc,

On 2020/3/20 17:01, Marc Zyngier wrote:
> Hi Zenghui,
> 
> On 2020-03-20 03:53, Zenghui Yu wrote:
>> Hi Marc,
>>
>> On 2020/3/19 20:10, Marc Zyngier wrote:
>>>> But I wonder that should we use nassgireq to *only* keep track what
>>>> the guest had written into the GICD_CTLR.nASSGIreq.  If not, we may
>>>> lose the guest-request bit after migration among hosts with different
>>>> has_gicv4_1 settings.
>>>
>>> I'm unsure of what you're suggesting here. If userspace tries to set
>>> GICD_CTLR.nASSGIreq on a non-4.1 host, this bit will not latch.
>>
>> This is exactly what I *was* concerning about.
>>
>>> Userspace can check that at restore time. Or we could fail the
>>> userspace write, which is a bit odd (the bit is otherwise RES0).
>>>
>>> Could you clarify your proposal?
>>
>> Let's assume two hosts below. 'has_gicv4_1' is true on host-A while
>> it is false on host-B because of lack of HW support or the kernel
>> parameter "kvm-arm.vgic_v4_enable=0".
>>
>> If we migrate guest through A->B->A, we may end-up lose the initial
>> guest-request "nASSGIreq=1" and don't use direct vSGI delivery for
>> this guest when it's migrated back to host-A.
> 
> My point above is that we shouldn't allow the A->B migration the first
> place, and fail it as quickly as possible. We don't know what the guest
> has observed in terms of GIC capability, and it may not have enabled the
> new flavour of SGIs just yet.

Indeed. I didn't realize it.

> 
>> This can be "fixed" by keep track of what guest had written into
>> nASSGIreq. And we need to evaluate the need for using direct vSGI
>> for a specified guest by 'has_gicv4_1 && nassgireq'.
> 
> It feels odd. It means we have more state than the HW normally has.
> I have an alternative proposal, see below.
> 
>> But if it's expected that "if userspace tries to set nASSGIreq on
>> a non-4.1 host, this bit will not latch", then this shouldn't be
>> a problem at all.
> 
> Well, that is the semantics of the RES0 bit. It applies from both
> guest and userspace.
> 
> And actually, maybe we can handle that pretty cheaply. If userspace
> tries to restore GICD_TYPER2 to a value that isn't what KVM can
> offer, we just return an error. Exactly like we do for GICD_IIDR.
> Hence the following patch:
> 
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index 28b639fd1abc..e72dcc454247 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -156,6 +156,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct 
> kvm_vcpu *vcpu,
>       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> 
>       switch (addr & 0x0c) {
> +    case GICD_TYPER2:
>       case GICD_IIDR:
>           if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
>               return -EINVAL;
> 
> Being a RO register, writing something that isn't compatible with the
> possible behaviour of the hypervisor will just return an error.

This is really a nice point to address my concern! I'm happy to see
this in v6 now.

> 
> What do you think?

I agreed with you, with a bit nervous though. Some old guests (which
have no knowledge about GICv4.1 vSGIs and don't care about nASSGIcap
at all) will also fail to migrate from A to B, just because now we
present two different (unused) GICD_TYPER2 registers to them.

Is it a little unfair to them :-) ?


Thanks,
Zenghui
Marc Zyngier March 23, 2020, 8:25 a.m. UTC | #12
Hi Zenghui,

[...]

>> And actually, maybe we can handle that pretty cheaply. If userspace
>> tries to restore GICD_TYPER2 to a value that isn't what KVM can
>> offer, we just return an error. Exactly like we do for GICD_IIDR.
>> Hence the following patch:
>> 
>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> index 28b639fd1abc..e72dcc454247 100644
>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>> @@ -156,6 +156,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct 
>> kvm_vcpu *vcpu,
>>       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>> 
>>       switch (addr & 0x0c) {
>> +    case GICD_TYPER2:
>>       case GICD_IIDR:
>>           if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
>>               return -EINVAL;
>> 
>> Being a RO register, writing something that isn't compatible with the
>> possible behaviour of the hypervisor will just return an error.
> 
> This is really a nice point to address my concern! I'm happy to see
> this in v6 now.
> 
>> 
>> What do you think?
> 
> I agreed with you, with a bit nervous though. Some old guests (which
> have no knowledge about GICv4.1 vSGIs and don't care about nASSGIcap
> at all) will also fail to migrate from A to B, just because now we
> present two different (unused) GICD_TYPER2 registers to them.
> 
> Is it a little unfair to them :-) ?

I never pretended to be fair! ;-)

I'm happy to prevent migrating from a v4.1 system (A) to a v4.0
system (B). As soon as the guest has run, it isn't safe to do so
(it may have read TYPER2, and now knows about vSGIs). We *could*
track this and find ways to migrate this state as well, but it
feels fragile.

Migrating from B to A is more appealing. It should be possible to
do so without much difficulty (just check that the nASSGIcap bit
is either 0 or equal to KVM's view of the capability).

But overall I seriously doubt we can easily migrate guests across
very different HW. We've been talking about this for years, and
we still don't have a good solution for it given the diversity
of the ecosystem... :-/

Thanks,

         M.
Zenghui Yu March 23, 2020, 12:40 p.m. UTC | #13
Hi Marc,

On 2020/3/23 16:25, Marc Zyngier wrote:
> Hi Zenghui,
> 
> [...]
> 
>>> And actually, maybe we can handle that pretty cheaply. If userspace
>>> tries to restore GICD_TYPER2 to a value that isn't what KVM can
>>> offer, we just return an error. Exactly like we do for GICD_IIDR.
>>> Hence the following patch:
>>>
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c 
>>> b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> index 28b639fd1abc..e72dcc454247 100644
>>> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
>>> @@ -156,6 +156,7 @@ static int vgic_mmio_uaccess_write_v3_misc(struct 
>>> kvm_vcpu *vcpu,
>>>       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>>>
>>>       switch (addr & 0x0c) {
>>> +    case GICD_TYPER2:
>>>       case GICD_IIDR:
>>>           if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
>>>               return -EINVAL;
>>>
>>> Being a RO register, writing something that isn't compatible with the
>>> possible behaviour of the hypervisor will just return an error.
>>
>> This is really a nice point to address my concern! I'm happy to see
>> this in v6 now.
>>
>>>
>>> What do you think?
>>
>> I agreed with you, with a bit nervous though. Some old guests (which
>> have no knowledge about GICv4.1 vSGIs and don't care about nASSGIcap
>> at all) will also fail to migrate from A to B, just because now we
>> present two different (unused) GICD_TYPER2 registers to them.
>>
>> Is it a little unfair to them :-) ?
> 
> I never pretended to be fair! ;-)
> 
> I'm happy to prevent migrating from a v4.1 system (A) to a v4.0
> system (B). As soon as the guest has run, it isn't safe to do so
> (it may have read TYPER2, and now knows about vSGIs). We *could*
> track this and find ways to migrate this state as well, but it
> feels fragile.
> 
> Migrating from B to A is more appealing. It should be possible to
> do so without much difficulty (just check that the nASSGIcap bit
> is either 0 or equal to KVM's view of the capability).
> 
> But overall I seriously doubt we can easily migrate guests across
> very different HW. We've been talking about this for years, and
> we still don't have a good solution for it given the diversity
> of the ecosystem... :-/

Fair enough. Thanks for your detailed explanation!


Zenghui
diff mbox series

Patch

diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index de89da76a379..442f3b8c2559 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -3,6 +3,7 @@ 
  * VGICv3 MMIO handling functions
  */
 
+#include <linux/bitfield.h>
 #include <linux/irqchip/arm-gic-v3.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
@@ -70,6 +71,8 @@  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 		if (vgic->enabled)
 			value |= GICD_CTLR_ENABLE_SS_G1;
 		value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
+		if (kvm_vgic_global_state.has_gicv4_1 && vgic->nassgireq)
+			value |= GICD_CTLR_nASSGIreq;
 		break;
 	case GICD_TYPER:
 		value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
@@ -81,6 +84,10 @@  static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 			value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
 		}
 		break;
+	case GICD_TYPER2:
+		if (kvm_vgic_global_state.has_gicv4_1)
+			value = GICD_TYPER2_nASSGIcap;
+		break;
 	case GICD_IIDR:
 		value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
 			(vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
@@ -98,17 +105,43 @@  static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
 				    unsigned long val)
 {
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	bool was_enabled = dist->enabled;
 
 	switch (addr & 0x0c) {
-	case GICD_CTLR:
+	case GICD_CTLR: {
+		bool was_enabled, is_hwsgi;
+
+		mutex_lock(&vcpu->kvm->lock);
+
+		was_enabled = dist->enabled;
+		is_hwsgi = dist->nassgireq;
+
 		dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
 
+		/* Not a GICv4.1? No HW SGIs */
+		if (!kvm_vgic_global_state.has_gicv4_1)
+			val &= ~GICD_CTLR_nASSGIreq;
+
+		/* Dist stays enabled? nASSGIreq is RO */
+		if (was_enabled && dist->enabled) {
+			val &= ~GICD_CTLR_nASSGIreq;
+			val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi);
+		}
+
+		/* Switching HW SGIs? */
+		dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+		if (is_hwsgi != dist->nassgireq)
+			vgic_v4_configure_vsgis(vcpu->kvm);
+
 		if (!was_enabled && dist->enabled)
 			vgic_kick_vcpus(vcpu->kvm);
+
+		mutex_unlock(&vcpu->kvm->lock);
 		break;
+	}
 	case GICD_TYPER:
+	case GICD_TYPER2:
 	case GICD_IIDR:
+		/* This is at best for documentation purposes... */
 		return;
 	}
 }
@@ -117,10 +150,21 @@  static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
 					   gpa_t addr, unsigned int len,
 					   unsigned long val)
 {
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
 	switch (addr & 0x0c) {
 	case GICD_IIDR:
 		if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
 			return -EINVAL;
+		return 0;
+	case GICD_CTLR:
+		/* Not a GICv4.1? No HW SGIs */
+		if (!kvm_vgic_global_state.has_gicv4_1)
+			val &= ~GICD_CTLR_nASSGIreq;
+
+		dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
+		dist->nassgireq = val & GICD_CTLR_nASSGIreq;
+		return 0;
 	}
 
 	vgic_mmio_write_v3_misc(vcpu, addr, len, val);
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 1bc09b523486..2c9fc13e2c59 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -540,6 +540,8 @@  int vgic_v3_map_resources(struct kvm *kvm)
 		goto out;
 	}
 
+	if (kvm_vgic_global_state.has_gicv4_1)
+		vgic_v4_configure_vsgis(kvm);
 	dist->ready = true;
 
 out: