diff mbox series

KVM: arm/arm64: vgic: Use a single IO device per redistributor

Message ID 20190823173330.23342-1-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm/arm64: vgic: Use a single IO device per redistributor | expand

Commit Message

Eric Auger Aug. 23, 2019, 5:33 p.m. UTC
At the moment we use 2 IO devices per GICv3 redistributor: one
one for the RD_base frame and one for the SGI_base frame.

Instead we can use a single IO device per redistributor (the 2
frames are contiguous). This saves slots on the KVM_MMIO_BUS
which is currently limited to NR_IOBUS_DEVS (1000).

This change allows to instantiate up to 512 redistributors and may
speed the guest boot with a large number of VCPUs.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
 include/kvm/arm_vgic.h           |  1 -
 virt/kvm/arm/vgic/vgic-init.c    |  1 -
 virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 ++++++++++----------------------
 3 files changed, 24 insertions(+), 59 deletions(-)

Comments

Eric Auger Aug. 23, 2019, 5:52 p.m. UTC | #1
Hi Zenghui, Marc,

On 8/23/19 7:33 PM, Eric Auger wrote:
> At the moment we use 2 IO devices per GICv3 redistributor: one
> one for the RD_base frame and one for the SGI_base frame.
> 
> Instead we can use a single IO device per redistributor (the 2
> frames are contiguous). This saves slots on the KVM_MMIO_BUS
> which is currently limited to NR_IOBUS_DEVS (1000).
> 
> This change allows to instantiate up to 512 redistributors and may
> speed the guest boot with a large number of VCPUs.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>

I tested this patch with below kernel and QEMU branches:
kernel: https://github.com/eauger/linux/tree/256fix-v1
(Marc's patch + this patch)
https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0
(header update + kvm_arm_gic_set_irq modification)

On a machine with 224 pcpus, I was able to boot a 512 vcpu guest.

As expected, qemu outputs warnings:

qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds
the recommended cpus supported by KVM (224)
qemu-system-aarch64: warning: Number of hotpluggable cpus requested
(512) exceeds the recommended cpus supported by KVM (224)

on the guest: getconf _NPROCESSORS_ONLN returns 512

Then I have no clue about what can be expected of such overcommit config
and I have not further exercised the guest at the moment. But at least
it seems to boot properly. I also tested without overcommit and it seems
to behave as before (boot, migration).

I still need to look at the migration of > 256vcpu guest at qemu level.

Thanks

Eric


> ---
>  include/kvm/arm_vgic.h           |  1 -
>  virt/kvm/arm/vgic/vgic-init.c    |  1 -
>  virt/kvm/arm/vgic/vgic-mmio-v3.c | 81 ++++++++++----------------------
>  3 files changed, 24 insertions(+), 59 deletions(-)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 7a30524a80ee..004f6e9d3b05 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -311,7 +311,6 @@ struct vgic_cpu {
>  	 * parts of the redistributor.
>  	 */
>  	struct vgic_io_device	rd_iodev;
> -	struct vgic_io_device	sgi_iodev;
>  	struct vgic_redist_region *rdreg;
>  
>  	/* Contains the attributes and gpa of the LPI pending tables. */
> diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
> index bdbc297d06fb..eaff7031a089 100644
> --- a/virt/kvm/arm/vgic/vgic-init.c
> +++ b/virt/kvm/arm/vgic/vgic-init.c
> @@ -192,7 +192,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>  	int i;
>  
>  	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
> -	vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
>  
>  	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
>  	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
> diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> index c45e2d7e942f..400067085cab 100644
> --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
> +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
> @@ -515,7 +515,8 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
>  		VGIC_ACCESS_32bit),
>  };
>  
> -static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
> +static const struct vgic_register_region vgic_v3_rd_registers[] = {
> +	/* RD_base registers */
>  	REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
>  		vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
>  		VGIC_ACCESS_32bit),
> @@ -540,44 +541,42 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
>  	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
>  		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
>  		VGIC_ACCESS_32bit),
> -};
> -
> -static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
> -	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
> +	/* SGI_base registers */
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
>  		vgic_mmio_read_group, vgic_mmio_write_group, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
>  		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
>  		vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
>  		vgic_mmio_read_pending, vgic_mmio_write_spending,
>  		vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
>  		vgic_mmio_read_pending, vgic_mmio_write_cpending,
>  		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
>  		vgic_mmio_read_active, vgic_mmio_write_sactive,
>  		NULL, vgic_mmio_uaccess_write_sactive,
>  		4, VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
> +	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
>  		vgic_mmio_read_active, vgic_mmio_write_cactive,
>  		NULL, vgic_mmio_uaccess_write_cactive,
>  		4, VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
>  		vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
>  		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
>  		vgic_mmio_read_config, vgic_mmio_write_config, 8,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
>  		VGIC_ACCESS_32bit),
> -	REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
> +	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
>  		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
>  		VGIC_ACCESS_32bit),
>  };
> @@ -607,9 +606,8 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>  	struct vgic_dist *vgic = &kvm->arch.vgic;
>  	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>  	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
> -	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
>  	struct vgic_redist_region *rdreg;
> -	gpa_t rd_base, sgi_base;
> +	gpa_t rd_base;
>  	int ret;
>  
>  	if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
> @@ -631,52 +629,31 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
>  	vgic_cpu->rdreg = rdreg;
>  
>  	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
> -	sgi_base = rd_base + SZ_64K;
>  
>  	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
>  	rd_dev->base_addr = rd_base;
>  	rd_dev->iodev_type = IODEV_REDIST;
> -	rd_dev->regions = vgic_v3_rdbase_registers;
> -	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
> +	rd_dev->regions = vgic_v3_rd_registers;
> +	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
>  	rd_dev->redist_vcpu = vcpu;
>  
>  	mutex_lock(&kvm->slots_lock);
>  	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
> -				      SZ_64K, &rd_dev->dev);
> +				      2 * SZ_64K, &rd_dev->dev);
>  	mutex_unlock(&kvm->slots_lock);
>  
>  	if (ret)
>  		return ret;
>  
> -	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
> -	sgi_dev->base_addr = sgi_base;
> -	sgi_dev->iodev_type = IODEV_REDIST;
> -	sgi_dev->regions = vgic_v3_sgibase_registers;
> -	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
> -	sgi_dev->redist_vcpu = vcpu;
> -
> -	mutex_lock(&kvm->slots_lock);
> -	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
> -				      SZ_64K, &sgi_dev->dev);
> -	if (ret) {
> -		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> -					  &rd_dev->dev);
> -		goto out;
> -	}
> -
>  	rdreg->free_index++;
> -out:
> -	mutex_unlock(&kvm->slots_lock);
> -	return ret;
> +	return 0;
>  }
>  
>  static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
>  {
>  	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
> -	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
>  
>  	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
> -	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
>  }
>  
>  static int vgic_register_all_redist_iodevs(struct kvm *kvm)
> @@ -826,8 +803,8 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
>  		iodev.base_addr = 0;
>  		break;
>  	case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
> -		iodev.regions = vgic_v3_rdbase_registers;
> -		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
> +		iodev.regions = vgic_v3_rd_registers;
> +		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
>  		iodev.base_addr = 0;
>  		break;
>  	}
> @@ -985,21 +962,11 @@ int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>  			   int offset, u32 *val)
>  {
>  	struct vgic_io_device rd_dev = {
> -		.regions = vgic_v3_rdbase_registers,
> -		.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
> +		.regions = vgic_v3_rd_registers,
> +		.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
>  	};
>  
> -	struct vgic_io_device sgi_dev = {
> -		.regions = vgic_v3_sgibase_registers,
> -		.nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
> -	};
> -
> -	/* SGI_base is the next 64K frame after RD_base */
> -	if (offset >= SZ_64K)
> -		return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
> -				    val);
> -	else
> -		return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
> +	return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
>  }
>  
>  int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,
>
Marc Zyngier Aug. 25, 2019, 10:04 a.m. UTC | #2
On Fri, 23 Aug 2019 18:33:30 +0100,
Eric Auger <eric.auger@redhat.com> wrote:
> 
> At the moment we use 2 IO devices per GICv3 redistributor: one
> one for the RD_base frame and one for the SGI_base frame.
> 
> Instead we can use a single IO device per redistributor (the 2
> frames are contiguous). This saves slots on the KVM_MMIO_BUS
> which is currently limited to NR_IOBUS_DEVS (1000).
> 
> This change allows to instantiate up to 512 redistributors and may
> speed the guest boot with a large number of VCPUs.
> 
> Signed-off-by: Eric Auger <eric.auger@redhat.com>

Yup, that's exactly what I was hoping for. A very nice cleanup, and no
need to increase NR_IOBUS_DEVS for the foreseeable future.

I've applied this to -next.

Thanks,

	M.
Marc Zyngier Aug. 25, 2019, 10:20 a.m. UTC | #3
On Fri, 23 Aug 2019 18:52:32 +0100,
Auger Eric <eric.auger@redhat.com> wrote:
> 
> Hi Zenghui, Marc,
> 
> On 8/23/19 7:33 PM, Eric Auger wrote:
> > At the moment we use 2 IO devices per GICv3 redistributor: one
> > one for the RD_base frame and one for the SGI_base frame.
> > 
> > Instead we can use a single IO device per redistributor (the 2
> > frames are contiguous). This saves slots on the KVM_MMIO_BUS
> > which is currently limited to NR_IOBUS_DEVS (1000).
> > 
> > This change allows to instantiate up to 512 redistributors and may
> > speed the guest boot with a large number of VCPUs.
> > 
> > Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> I tested this patch with below kernel and QEMU branches:
> kernel: https://github.com/eauger/linux/tree/256fix-v1
> (Marc's patch + this patch)
> https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0
> (header update + kvm_arm_gic_set_irq modification)

A small comment on this: you don't seem to check that
KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 is available before allowing more than
256 vcpus. It'd be worth doing that before allowing a guest to start.

> 
> On a machine with 224 pcpus, I was able to boot a 512 vcpu guest.
> 
> As expected, qemu outputs warnings:
> 
> qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds
> the recommended cpus supported by KVM (224)
> qemu-system-aarch64: warning: Number of hotpluggable cpus requested
> (512) exceeds the recommended cpus supported by KVM (224)
> 
> on the guest: getconf _NPROCESSORS_ONLN returns 512
> 
> Then I have no clue about what can be expected of such overcommit config
> and I have not further exercised the guest at the moment.

It will just work, albeit slowly. I often boot 64 vcpu guests on 4 or
8 core systems, just to check that we don't regress too much.

> But at least
> it seems to boot properly. I also tested without overcommit and it seems
> to behave as before (boot, migration).
> 
> I still need to look at the migration of > 256vcpu guest at qemu level.

OK, please let us know how it goes. I'd like some more reviewing on
the userspace ABI change before merging it though. Peter, your input
would be very good to have.

Thanks,

	M.
Zenghui Yu Aug. 27, 2019, 7:49 a.m. UTC | #4
Hi Eric,

Thanks for this patch!

On 2019/8/24 1:52, Auger Eric wrote:
> Hi Zenghui, Marc,
> 
> On 8/23/19 7:33 PM, Eric Auger wrote:
>> At the moment we use 2 IO devices per GICv3 redistributor: one
                                                              ^^^
>> one for the RD_base frame and one for the SGI_base frame.
   ^^^
>>
>> Instead we can use a single IO device per redistributor (the 2
>> frames are contiguous). This saves slots on the KVM_MMIO_BUS
>> which is currently limited to NR_IOBUS_DEVS (1000).
>>
>> This change allows to instantiate up to 512 redistributors and may
>> speed the guest boot with a large number of VCPUs.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> 
> I tested this patch with below kernel and QEMU branches:
> kernel: https://github.com/eauger/linux/tree/256fix-v1
> (Marc's patch + this patch)
> https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0
> (header update + kvm_arm_gic_set_irq modification)

I also tested these three changes on HiSi D05 (with 64 pcpus), and yes,
I can get a 512U guest to boot properly now.

Tested-by: Zenghui Yu <yuzenghui@huawei.com>

> On a machine with 224 pcpus, I was able to boot a 512 vcpu guest.
> 
> As expected, qemu outputs warnings:
> 
> qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds
> the recommended cpus supported by KVM (224)
> qemu-system-aarch64: warning: Number of hotpluggable cpus requested
> (512) exceeds the recommended cpus supported by KVM (224)
> 
> on the guest: getconf _NPROCESSORS_ONLN returns 512
> 
> Then I have no clue about what can be expected of such overcommit config
> and I have not further exercised the guest at the moment. But at least
> it seems to boot properly. I also tested without overcommit and it seems
> to behave as before (boot, migration).
> 
> I still need to look at the migration of > 256vcpu guest at qemu level.

Let us know if further tests are needed.


Thanks,
zenghui
Eric Auger Aug. 27, 2019, 7:53 a.m. UTC | #5
Hi Zenghui,
On 8/27/19 9:49 AM, Zenghui Yu wrote:
> Hi Eric,
> 
> Thanks for this patch!
> 
> On 2019/8/24 1:52, Auger Eric wrote:
>> Hi Zenghui, Marc,
>>
>> On 8/23/19 7:33 PM, Eric Auger wrote:
>>> At the moment we use 2 IO devices per GICv3 redistributor: one
>                                                              ^^^
>>> one for the RD_base frame and one for the SGI_base frame.
>   ^^^
>>>
>>> Instead we can use a single IO device per redistributor (the 2
>>> frames are contiguous). This saves slots on the KVM_MMIO_BUS
>>> which is currently limited to NR_IOBUS_DEVS (1000).
>>>
>>> This change allows to instantiate up to 512 redistributors and may
>>> speed the guest boot with a large number of VCPUs.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> I tested this patch with below kernel and QEMU branches:
>> kernel: https://github.com/eauger/linux/tree/256fix-v1
>> (Marc's patch + this patch)
>> https://github.com/eauger/qemu/tree/v4.1.0-256fix-rfc1-rc0
>> (header update + kvm_arm_gic_set_irq modification)
> 
> I also tested these three changes on HiSi D05 (with 64 pcpus), and yes,
> I can get a 512U guest to boot properly now.

Many thanks for the testing (and the bug report). I will formally post
the QEMU changes asap.

Thanks

Eric
> 
> Tested-by: Zenghui Yu <yuzenghui@huawei.com>
> 
>> On a machine with 224 pcpus, I was able to boot a 512 vcpu guest.
>>
>> As expected, qemu outputs warnings:
>>
>> qemu-system-aarch64: warning: Number of SMP cpus requested (512) exceeds
>> the recommended cpus supported by KVM (224)
>> qemu-system-aarch64: warning: Number of hotpluggable cpus requested
>> (512) exceeds the recommended cpus supported by KVM (224)
>>
>> on the guest: getconf _NPROCESSORS_ONLN returns 512
>>
>> Then I have no clue about what can be expected of such overcommit config
>> and I have not further exercised the guest at the moment. But at least
>> it seems to boot properly. I also tested without overcommit and it seems
>> to behave as before (boot, migration).
>>
>> I still need to look at the migration of > 256vcpu guest at qemu level.
> 
> Let us know if further tests are needed.
> 
> 
> Thanks,
> zenghui
>
diff mbox series

Patch

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7a30524a80ee..004f6e9d3b05 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -311,7 +311,6 @@  struct vgic_cpu {
 	 * parts of the redistributor.
 	 */
 	struct vgic_io_device	rd_iodev;
-	struct vgic_io_device	sgi_iodev;
 	struct vgic_redist_region *rdreg;
 
 	/* Contains the attributes and gpa of the LPI pending tables. */
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index bdbc297d06fb..eaff7031a089 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -192,7 +192,6 @@  int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	int i;
 
 	vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
-	vgic_cpu->sgi_iodev.base_addr = VGIC_ADDR_UNDEF;
 
 	INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
 	raw_spin_lock_init(&vgic_cpu->ap_list_lock);
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index c45e2d7e942f..400067085cab 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -515,7 +515,8 @@  static const struct vgic_register_region vgic_v3_dist_registers[] = {
 		VGIC_ACCESS_32bit),
 };
 
-static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
+static const struct vgic_register_region vgic_v3_rd_registers[] = {
+	/* RD_base registers */
 	REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
 		vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4,
 		VGIC_ACCESS_32bit),
@@ -540,44 +541,42 @@  static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
 	REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
 		vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
 		VGIC_ACCESS_32bit),
-};
-
-static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
-	REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
+	/* SGI_base registers */
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0,
 		vgic_mmio_read_group, vgic_mmio_write_group, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ISENABLER0,
 		vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICENABLER0,
 		vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISPENDR0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
 		vgic_mmio_read_pending, vgic_mmio_write_spending,
 		vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
 		vgic_mmio_read_pending, vgic_mmio_write_cpending,
 		vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0,
 		vgic_mmio_read_active, vgic_mmio_write_sactive,
 		NULL, vgic_mmio_uaccess_write_sactive,
 		4, VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0,
+	REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0,
 		vgic_mmio_read_active, vgic_mmio_write_cactive,
 		NULL, vgic_mmio_uaccess_write_cactive,
 		4, VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0,
 		vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
 		VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0,
 		vgic_mmio_read_config, vgic_mmio_write_config, 8,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
-	REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
+	REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR,
 		vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
 		VGIC_ACCESS_32bit),
 };
@@ -607,9 +606,8 @@  int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	struct vgic_dist *vgic = &kvm->arch.vgic;
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
 	struct vgic_redist_region *rdreg;
-	gpa_t rd_base, sgi_base;
+	gpa_t rd_base;
 	int ret;
 
 	if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr))
@@ -631,52 +629,31 @@  int vgic_register_redist_iodev(struct kvm_vcpu *vcpu)
 	vgic_cpu->rdreg = rdreg;
 
 	rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE;
-	sgi_base = rd_base + SZ_64K;
 
 	kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
 	rd_dev->base_addr = rd_base;
 	rd_dev->iodev_type = IODEV_REDIST;
-	rd_dev->regions = vgic_v3_rdbase_registers;
-	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+	rd_dev->regions = vgic_v3_rd_registers;
+	rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
 	rd_dev->redist_vcpu = vcpu;
 
 	mutex_lock(&kvm->slots_lock);
 	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
-				      SZ_64K, &rd_dev->dev);
+				      2 * SZ_64K, &rd_dev->dev);
 	mutex_unlock(&kvm->slots_lock);
 
 	if (ret)
 		return ret;
 
-	kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
-	sgi_dev->base_addr = sgi_base;
-	sgi_dev->iodev_type = IODEV_REDIST;
-	sgi_dev->regions = vgic_v3_sgibase_registers;
-	sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
-	sgi_dev->redist_vcpu = vcpu;
-
-	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
-				      SZ_64K, &sgi_dev->dev);
-	if (ret) {
-		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
-					  &rd_dev->dev);
-		goto out;
-	}
-
 	rdreg->free_index++;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return ret;
+	return 0;
 }
 
 static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu)
 {
 	struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev;
-	struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev;
 
 	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev);
-	kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &sgi_dev->dev);
 }
 
 static int vgic_register_all_redist_iodevs(struct kvm *kvm)
@@ -826,8 +803,8 @@  int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
 		iodev.base_addr = 0;
 		break;
 	case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{
-		iodev.regions = vgic_v3_rdbase_registers;
-		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+		iodev.regions = vgic_v3_rd_registers;
+		iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers);
 		iodev.base_addr = 0;
 		break;
 	}
@@ -985,21 +962,11 @@  int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
 			   int offset, u32 *val)
 {
 	struct vgic_io_device rd_dev = {
-		.regions = vgic_v3_rdbase_registers,
-		.nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers),
+		.regions = vgic_v3_rd_registers,
+		.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers),
 	};
 
-	struct vgic_io_device sgi_dev = {
-		.regions = vgic_v3_sgibase_registers,
-		.nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers),
-	};
-
-	/* SGI_base is the next 64K frame after RD_base */
-	if (offset >= SZ_64K)
-		return vgic_uaccess(vcpu, &sgi_dev, is_write, offset - SZ_64K,
-				    val);
-	else
-		return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
+	return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val);
 }
 
 int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write,