diff mbox

[v4,22/56] KVM: arm/arm64: vgic-new: Add MMIO handling framework

Message ID 1463392481-26583-23-git-send-email-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara May 16, 2016, 9:53 a.m. UTC
From: Marc Zyngier <marc.zyngier@arm.com>

Add an MMIO handling framework to the VGIC emulation:
Each register is described by its offset, size (or number of bits per
IRQ, if applicable) and the read/write handler functions. We provide
initialization macros to describe each GIC register later easily.

Separate dispatch functions for read and write accesses are connected
to the kvm_io_bus framework and binary-search for the responsible
register handler based on the offset address within the region.
We convert the incoming data (referenced by a pointer) to the host's
endianess and use pass-by-value to hand the data over to the actual
handler functions.

The register handler prototype and the endianess conversion are
courtesy of Christoffer Dall.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
Changelog RFC..v1:
- rework MMIO dispatching to use only one kvm_io_bus device
- document purpose of register region macros
- rename "this" parameter to "dev"
- change IGROUPR to be RAO (returning 1 => Group1 IRQs)

Changelog v1 .. v2:
* MASSIVE rework:
- store register_region pointer in kvm_io_bus linked struct
- replace write_mask_xxx functions with extract_bytes() implementation
- change handler functions' prototypes to take and return unsigned long
- use binary search to find matching register handler
- convert endianess of input data in dispatch_mmio_xxx functions
- improve readability of register initializer macros
- remove any GICv2/GICv3 specific functions from vgic-mmio.c
- rename file from vgic_mmio.c to vgic-mmio.c

Changelog v2 .. v3:
- replace inclusion of vgic/vgic.h with arm_vgic.h

Changelog v3 .. v4:
- add IRQ number accessor macro
- check access width in dispatcher
- treat non-covered MMIO addresses as RAZ/WI
- remove extract_bytes() (re-introduced as static later in the series)

 include/kvm/vgic/vgic.h       |  13 +++
 virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
 3 files changed, 284 insertions(+)
 create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
 create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h

Comments

Marc Zyngier May 17, 2016, 1:33 p.m. UTC | #1
On 16/05/16 10:53, Andre Przywara wrote:
> From: Marc Zyngier <marc.zyngier@arm.com>
> 
> Add an MMIO handling framework to the VGIC emulation:
> Each register is described by its offset, size (or number of bits per
> IRQ, if applicable) and the read/write handler functions. We provide
> initialization macros to describe each GIC register later easily.
> 
> Separate dispatch functions for read and write accesses are connected
> to the kvm_io_bus framework and binary-search for the responsible
> register handler based on the offset address within the region.
> We convert the incoming data (referenced by a pointer) to the host's
> endianess and use pass-by-value to hand the data over to the actual
> handler functions.
> 
> The register handler prototype and the endianess conversion are
> courtesy of Christoffer Dall.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
> Changelog RFC..v1:
> - rework MMIO dispatching to use only one kvm_io_bus device
> - document purpose of register region macros
> - rename "this" parameter to "dev"
> - change IGROUPR to be RAO (returning 1 => Group1 IRQs)
> 
> Changelog v1 .. v2:
> * MASSIVE rework:
> - store register_region pointer in kvm_io_bus linked struct
> - replace write_mask_xxx functions with extract_bytes() implementation
> - change handler functions' prototypes to take and return unsigned long
> - use binary search to find matching register handler
> - convert endianess of input data in dispatch_mmio_xxx functions
> - improve readability of register initializer macros
> - remove any GICv2/GICv3 specific functions from vgic-mmio.c
> - rename file from vgic_mmio.c to vgic-mmio.c
> 
> Changelog v2 .. v3:
> - replace inclusion of vgic/vgic.h with arm_vgic.h
> 
> Changelog v3 .. v4:
> - add IRQ number accessor macro
> - check access width in dispatcher
> - treat non-covered MMIO addresses as RAZ/WI
> - remove extract_bytes() (re-introduced as static later in the series)
> 
>  include/kvm/vgic/vgic.h       |  13 +++
>  virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
>  3 files changed, 284 insertions(+)
>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h
> 
> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> index f663288..ff3f9c2 100644
> --- a/include/kvm/vgic/vgic.h
> +++ b/include/kvm/vgic/vgic.h
> @@ -106,6 +106,16 @@ struct vgic_irq {
>  	enum vgic_irq_config config;	/* Level or edge */
>  };
>  
> +struct vgic_register_region;
> +
> +struct vgic_io_device {
> +	gpa_t base_addr;
> +	struct kvm_vcpu *redist_vcpu;
> +	const struct vgic_register_region *regions;
> +	int nr_regions;
> +	struct kvm_io_device dev;
> +};
> +
>  struct vgic_dist {
>  	bool			in_kernel;
>  	bool			ready;
> @@ -132,6 +142,9 @@ struct vgic_dist {
>  	bool			enabled;
>  
>  	struct vgic_irq		*spis;
> +
> +	struct vgic_io_device	dist_iodev;
> +	struct vgic_io_device	*redist_iodevs;
>  };
>  
>  struct vgic_v2_cpu_if {
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> new file mode 100644
> index 0000000..012b82b
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> @@ -0,0 +1,184 @@
> +/*
> + * VGIC MMIO handling functions
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/bitops.h>
> +#include <linux/bsearch.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <kvm/iodev.h>
> +#include <kvm/arm_vgic.h>
> +
> +#include "vgic.h"
> +#include "vgic-mmio.h"
> +
> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len)
> +{
> +	return 0;
> +}
> +
> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len)
> +{
> +	return -1UL;
> +}
> +
> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> +			unsigned int len, unsigned long val)
> +{
> +	/* Ignore */
> +}
> +
> +static int match_region(const void *key, const void *elt)
> +{
> +	const unsigned int offset = (unsigned long)key;
> +	const struct vgic_register_region *region = elt;
> +
> +	if (offset < region->reg_offset)
> +		return -1;
> +
> +	if (offset >= region->reg_offset + region->len)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/* Find the proper register handler entry given a certain address offset. */
> +static const struct vgic_register_region *
> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
> +		      unsigned int offset)
> +{
> +	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
> +		       sizeof(region[0]), match_region);
> +}
> +
> +/*
> + * kvm_mmio_read_buf() returns a value in a format where it can be converted
> + * to a byte array and be directly observed as the guest wanted it to appear
> + * in memory if it had done the store itself, which is LE for the GIC, as the
> + * guest knows the GIC is always LE.
> + *
> + * We convert this value to the CPUs native format to deal with it as a data
> + * value.
> + */
> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
> +{
> +	unsigned long data = kvm_mmio_read_buf(val, len);
> +
> +	switch (len) {
> +	case 1:
> +		return data;
> +	case 2:
> +		return le16_to_cpu(data);
> +	case 4:
> +		return le32_to_cpu(data);
> +	default:
> +		return le64_to_cpu(data);
> +	}
> +}
> +
> +/*
> + * kvm_mmio_write_buf() expects a value in a format such that if converted to
> + * a byte array it is observed as the guest would see it if it could perform
> + * the load directly.  Since the GIC is LE, and the guest knows this, the
> + * guest expects a value in little endian format.
> + *
> + * We convert the data value from the CPUs native format to LE so that the
> + * value is returned in the proper format.
> + */
> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> +				unsigned long data)
> +{
> +	switch (len) {
> +	case 1:
> +		break;
> +	case 2:
> +		data = cpu_to_le16(data);
> +		break;
> +	case 4:
> +		data = cpu_to_le32(data);
> +		break;
> +	default:
> +		data = cpu_to_le64(data);
> +	}
> +
> +	kvm_mmio_write_buf(buf, len, data);
> +}
> +
> +static
> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
> +{
> +	return container_of(dev, struct vgic_io_device, dev);
> +}
> +
> +static bool check_region(const struct vgic_register_region *region,
> +			 gpa_t addr, int len)
> +{
> +	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
> +		return true;
> +	if ((region->access_flags & VGIC_ACCESS_32bit) &&
> +	    len == sizeof(u32) && !(addr & 3))
> +		return true;
> +	if ((region->access_flags & VGIC_ACCESS_64bit) &&
> +	    len == sizeof(u64) && !(addr & 7))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +			      gpa_t addr, int len, void *val)
> +{
> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> +	const struct vgic_register_region *region;
> +	struct kvm_vcpu *r_vcpu;
> +	unsigned long data;
> +
> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> +				       addr - iodev->base_addr);
> +	if (!region || !check_region(region, addr, len)) {
> +		memset(val, 0, len);
> +		return 0;
> +	}
> +
> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> +	data = region->read(r_vcpu, addr, len);
> +	vgic_data_host_to_mmio_bus(val, len, data);
> +	return 0;
> +}
> +
> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +			       gpa_t addr, int len, const void *val)
> +{
> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> +	const struct vgic_register_region *region;
> +	struct kvm_vcpu *r_vcpu;
> +	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
> +
> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> +				       addr - iodev->base_addr);
> +	if (!region)
> +		return 0;
> +
> +	if (!check_region(region, addr, len))
> +		return 0;
> +
> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> +	region->write(r_vcpu, addr, len, data);
> +	return 0;
> +}
> +
> +struct kvm_io_device_ops kvm_io_gic_ops = {
> +	.read = dispatch_mmio_read,
> +	.write = dispatch_mmio_write,
> +};
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
> new file mode 100644
> index 0000000..855b1db
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-mmio.h
> @@ -0,0 +1,87 @@
> +/*
> + * Copyright (C) 2015, 2016 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef __KVM_ARM_VGIC_MMIO_H__
> +#define __KVM_ARM_VGIC_MMIO_H__
> +
> +struct vgic_register_region {
> +	unsigned int reg_offset;
> +	unsigned int len;
> +	unsigned int bits_per_irq;
> +	unsigned int access_flags;
> +	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
> +			      unsigned int len);
> +	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
> +		      unsigned long val);
> +};
> +
> +extern struct kvm_io_device_ops kvm_io_gic_ops;
> +
> +#define VGIC_ACCESS_8bit	1
> +#define VGIC_ACCESS_32bit	2
> +#define VGIC_ACCESS_64bit	4
> +
> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */

Hmmm. I'd appreciate some additional comments, specially when it comes
to the various restrictions. May I'd suggest something like:

/*
 * Generate a mask that covers the number of bytes required to address
 * up to 1024 interrupts, each represented by <b> bits. This assumes
 * that <b> is a power of two.
 *
 * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
 * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
 * this to a byte address.
 */

> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> +					  ilog2(BITS_PER_BYTE) - 1, 0)

/*
 * Convert a base address into a base interrupt (each interrupt
 * represented by <bits> bits. This assumes that <bits> is a power
 * of two, that <addr> both part of a memory region aligned on a
 * <b> bits boundary, and itself aligned on that same boundary
 * (for regions that describe an interrupt with more than a single
 * byte of data).
 */

> +#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
> +					64 / (bits) / 8)
> +
> +/*
> + * Some VGIC registers store per-IRQ information, with a different number
> + * of bits per IRQ. For those registers this macro is used.
> + * The _WITH_LENGTH version instantiates registers with a fixed length
> + * and is mutually exclusive with the _PER_IRQ version.
> + */
> +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc)		\
> +	{								\
> +		.reg_offset = off,					\
> +		.bits_per_irq = bpi,					\
> +		.len = bpi * 1024 / 8,					\
> +		.access_flags = acc,					\
> +		.read = rd,						\
> +		.write = wr,						\
> +	}
> +
> +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc)		\
> +	{								\
> +		.reg_offset = off,					\
> +		.bits_per_irq = 0,					\
> +		.len = length,						\
> +		.access_flags = acc,					\
> +		.read = rd,						\
> +		.write = wr,						\
> +	}
> +
> +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
> +				  struct vgic_register_region *reg_desc,
> +				  struct vgic_io_device *region,
> +				  int nr_irqs, bool offset_private);
> +
> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
> +
> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> +				unsigned long data);
> +
> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len);
> +
> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len);
> +
> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> +			unsigned int len, unsigned long val);
> +
> +#endif
> 

Thanks,

	M.
Christoffer Dall May 18, 2016, 12:25 p.m. UTC | #2
On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote:
> On 16/05/16 10:53, Andre Przywara wrote:
> > From: Marc Zyngier <marc.zyngier@arm.com>
> > 
> > Add an MMIO handling framework to the VGIC emulation:
> > Each register is described by its offset, size (or number of bits per
> > IRQ, if applicable) and the read/write handler functions. We provide
> > initialization macros to describe each GIC register later easily.
> > 
> > Separate dispatch functions for read and write accesses are connected
> > to the kvm_io_bus framework and binary-search for the responsible
> > register handler based on the offset address within the region.
> > We convert the incoming data (referenced by a pointer) to the host's
> > endianess and use pass-by-value to hand the data over to the actual
> > handler functions.
> > 
> > The register handler prototype and the endianess conversion are
> > courtesy of Christoffer Dall.
> > 
> > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> > ---
> > Changelog RFC..v1:
> > - rework MMIO dispatching to use only one kvm_io_bus device
> > - document purpose of register region macros
> > - rename "this" parameter to "dev"
> > - change IGROUPR to be RAO (returning 1 => Group1 IRQs)
> > 
> > Changelog v1 .. v2:
> > * MASSIVE rework:
> > - store register_region pointer in kvm_io_bus linked struct
> > - replace write_mask_xxx functions with extract_bytes() implementation
> > - change handler functions' prototypes to take and return unsigned long
> > - use binary search to find matching register handler
> > - convert endianess of input data in dispatch_mmio_xxx functions
> > - improve readability of register initializer macros
> > - remove any GICv2/GICv3 specific functions from vgic-mmio.c
> > - rename file from vgic_mmio.c to vgic-mmio.c
> > 
> > Changelog v2 .. v3:
> > - replace inclusion of vgic/vgic.h with arm_vgic.h
> > 
> > Changelog v3 .. v4:
> > - add IRQ number accessor macro
> > - check access width in dispatcher
> > - treat non-covered MMIO addresses as RAZ/WI
> > - remove extract_bytes() (re-introduced as static later in the series)
> > 
> >  include/kvm/vgic/vgic.h       |  13 +++
> >  virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
> >  virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
> >  3 files changed, 284 insertions(+)
> >  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
> >  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h
> > 
> > diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> > index f663288..ff3f9c2 100644
> > --- a/include/kvm/vgic/vgic.h
> > +++ b/include/kvm/vgic/vgic.h
> > @@ -106,6 +106,16 @@ struct vgic_irq {
> >  	enum vgic_irq_config config;	/* Level or edge */
> >  };
> >  
> > +struct vgic_register_region;
> > +
> > +struct vgic_io_device {
> > +	gpa_t base_addr;
> > +	struct kvm_vcpu *redist_vcpu;
> > +	const struct vgic_register_region *regions;
> > +	int nr_regions;
> > +	struct kvm_io_device dev;
> > +};
> > +
> >  struct vgic_dist {
> >  	bool			in_kernel;
> >  	bool			ready;
> > @@ -132,6 +142,9 @@ struct vgic_dist {
> >  	bool			enabled;
> >  
> >  	struct vgic_irq		*spis;
> > +
> > +	struct vgic_io_device	dist_iodev;
> > +	struct vgic_io_device	*redist_iodevs;
> >  };
> >  
> >  struct vgic_v2_cpu_if {
> > diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> > new file mode 100644
> > index 0000000..012b82b
> > --- /dev/null
> > +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> > @@ -0,0 +1,184 @@
> > +/*
> > + * VGIC MMIO handling functions
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + */
> > +
> > +#include <linux/bitops.h>
> > +#include <linux/bsearch.h>
> > +#include <linux/kvm.h>
> > +#include <linux/kvm_host.h>
> > +#include <kvm/iodev.h>
> > +#include <kvm/arm_vgic.h>
> > +
> > +#include "vgic.h"
> > +#include "vgic-mmio.h"
> > +
> > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> > +				 gpa_t addr, unsigned int len)
> > +{
> > +	return 0;
> > +}
> > +
> > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> > +				 gpa_t addr, unsigned int len)
> > +{
> > +	return -1UL;
> > +}
> > +
> > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> > +			unsigned int len, unsigned long val)
> > +{
> > +	/* Ignore */
> > +}
> > +
> > +static int match_region(const void *key, const void *elt)
> > +{
> > +	const unsigned int offset = (unsigned long)key;
> > +	const struct vgic_register_region *region = elt;
> > +
> > +	if (offset < region->reg_offset)
> > +		return -1;
> > +
> > +	if (offset >= region->reg_offset + region->len)
> > +		return 1;
> > +
> > +	return 0;
> > +}
> > +
> > +/* Find the proper register handler entry given a certain address offset. */
> > +static const struct vgic_register_region *
> > +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
> > +		      unsigned int offset)
> > +{
> > +	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
> > +		       sizeof(region[0]), match_region);
> > +}
> > +
> > +/*
> > + * kvm_mmio_read_buf() returns a value in a format where it can be converted
> > + * to a byte array and be directly observed as the guest wanted it to appear
> > + * in memory if it had done the store itself, which is LE for the GIC, as the
> > + * guest knows the GIC is always LE.
> > + *
> > + * We convert this value to the CPUs native format to deal with it as a data
> > + * value.
> > + */
> > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
> > +{
> > +	unsigned long data = kvm_mmio_read_buf(val, len);
> > +
> > +	switch (len) {
> > +	case 1:
> > +		return data;
> > +	case 2:
> > +		return le16_to_cpu(data);
> > +	case 4:
> > +		return le32_to_cpu(data);
> > +	default:
> > +		return le64_to_cpu(data);
> > +	}
> > +}
> > +
> > +/*
> > + * kvm_mmio_write_buf() expects a value in a format such that if converted to
> > + * a byte array it is observed as the guest would see it if it could perform
> > + * the load directly.  Since the GIC is LE, and the guest knows this, the
> > + * guest expects a value in little endian format.
> > + *
> > + * We convert the data value from the CPUs native format to LE so that the
> > + * value is returned in the proper format.
> > + */
> > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> > +				unsigned long data)
> > +{
> > +	switch (len) {
> > +	case 1:
> > +		break;
> > +	case 2:
> > +		data = cpu_to_le16(data);
> > +		break;
> > +	case 4:
> > +		data = cpu_to_le32(data);
> > +		break;
> > +	default:
> > +		data = cpu_to_le64(data);
> > +	}
> > +
> > +	kvm_mmio_write_buf(buf, len, data);
> > +}
> > +
> > +static
> > +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
> > +{
> > +	return container_of(dev, struct vgic_io_device, dev);
> > +}
> > +
> > +static bool check_region(const struct vgic_register_region *region,
> > +			 gpa_t addr, int len)
> > +{
> > +	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
> > +		return true;
> > +	if ((region->access_flags & VGIC_ACCESS_32bit) &&
> > +	    len == sizeof(u32) && !(addr & 3))
> > +		return true;
> > +	if ((region->access_flags & VGIC_ACCESS_64bit) &&
> > +	    len == sizeof(u64) && !(addr & 7))
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> > +			      gpa_t addr, int len, void *val)
> > +{
> > +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> > +	const struct vgic_register_region *region;
> > +	struct kvm_vcpu *r_vcpu;
> > +	unsigned long data;
> > +
> > +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> > +				       addr - iodev->base_addr);
> > +	if (!region || !check_region(region, addr, len)) {
> > +		memset(val, 0, len);
> > +		return 0;
> > +	}
> > +
> > +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> > +	data = region->read(r_vcpu, addr, len);
> > +	vgic_data_host_to_mmio_bus(val, len, data);
> > +	return 0;
> > +}
> > +
> > +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> > +			       gpa_t addr, int len, const void *val)
> > +{
> > +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> > +	const struct vgic_register_region *region;
> > +	struct kvm_vcpu *r_vcpu;
> > +	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
> > +
> > +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> > +				       addr - iodev->base_addr);
> > +	if (!region)
> > +		return 0;
> > +
> > +	if (!check_region(region, addr, len))
> > +		return 0;
> > +
> > +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> > +	region->write(r_vcpu, addr, len, data);
> > +	return 0;
> > +}
> > +
> > +struct kvm_io_device_ops kvm_io_gic_ops = {
> > +	.read = dispatch_mmio_read,
> > +	.write = dispatch_mmio_write,
> > +};
> > diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
> > new file mode 100644
> > index 0000000..855b1db
> > --- /dev/null
> > +++ b/virt/kvm/arm/vgic/vgic-mmio.h
> > @@ -0,0 +1,87 @@
> > +/*
> > + * Copyright (C) 2015, 2016 ARM Ltd.
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License version 2 as
> > + * published by the Free Software Foundation.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +#ifndef __KVM_ARM_VGIC_MMIO_H__
> > +#define __KVM_ARM_VGIC_MMIO_H__
> > +
> > +struct vgic_register_region {
> > +	unsigned int reg_offset;
> > +	unsigned int len;
> > +	unsigned int bits_per_irq;
> > +	unsigned int access_flags;
> > +	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
> > +			      unsigned int len);
> > +	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
> > +		      unsigned long val);
> > +};
> > +
> > +extern struct kvm_io_device_ops kvm_io_gic_ops;
> > +
> > +#define VGIC_ACCESS_8bit	1
> > +#define VGIC_ACCESS_32bit	2
> > +#define VGIC_ACCESS_64bit	4
> > +
> > +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
> 
> Hmmm. I'd appreciate some additional comments, specially when it comes
> to the various restrictions. May I'd suggest something like:
> 
> /*
>  * Generate a mask that covers the number of bytes required to address
>  * up to 1024 interrupts, each represented by <b> bits. This assumes
>  * that <b> is a power of two.
>  *
>  * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
>  * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
>  * this to a byte address.

So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm
stupid enough to not understand our use of logarithms here.  Can someone
remind me whatever I forgot at CS 101?

>  */
> 
> > +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> > +					  ilog2(BITS_PER_BYTE) - 1, 0)
> 
> /*
>  * Convert a base address into a base interrupt (each interrupt
>  * represented by <bits> bits. This assumes that <bits> is a power
>  * of two, that <addr> both part of a memory region aligned on a

did you mean '<addr> *is* both part of' ?

>  * <b> bits boundary, and itself aligned on that same boundary
>  * (for regions that describe an interrupt with more than a single
>  * byte of data).
>  */
> 

In any case, thanks for the commentary, I was faily lost here.

-Christoffer

> > +#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
> > +					64 / (bits) / 8)
> > +
> > +/*
> > + * Some VGIC registers store per-IRQ information, with a different number
> > + * of bits per IRQ. For those registers this macro is used.
> > + * The _WITH_LENGTH version instantiates registers with a fixed length
> > + * and is mutually exclusive with the _PER_IRQ version.
> > + */
> > +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc)		\
> > +	{								\
> > +		.reg_offset = off,					\
> > +		.bits_per_irq = bpi,					\
> > +		.len = bpi * 1024 / 8,					\
> > +		.access_flags = acc,					\
> > +		.read = rd,						\
> > +		.write = wr,						\
> > +	}
> > +
> > +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc)		\
> > +	{								\
> > +		.reg_offset = off,					\
> > +		.bits_per_irq = 0,					\
> > +		.len = length,						\
> > +		.access_flags = acc,					\
> > +		.read = rd,						\
> > +		.write = wr,						\
> > +	}
> > +
> > +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
> > +				  struct vgic_register_region *reg_desc,
> > +				  struct vgic_io_device *region,
> > +				  int nr_irqs, bool offset_private);
> > +
> > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
> > +
> > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> > +				unsigned long data);
> > +
> > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> > +				 gpa_t addr, unsigned int len);
> > +
> > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> > +				 gpa_t addr, unsigned int len);
> > +
> > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> > +			unsigned int len, unsigned long val);
> > +
> > +#endif
> > 
> 
> Thanks,
> 
> 	M.
> -- 
> Jazz is not dead. It just smells funny...
Christoffer Dall May 18, 2016, 12:31 p.m. UTC | #3
On Mon, May 16, 2016 at 10:53:10AM +0100, Andre Przywara wrote:
> From: Marc Zyngier <marc.zyngier@arm.com>
> 
> Add an MMIO handling framework to the VGIC emulation:
> Each register is described by its offset, size (or number of bits per
> IRQ, if applicable) and the read/write handler functions. We provide
> initialization macros to describe each GIC register later easily.
> 
> Separate dispatch functions for read and write accesses are connected
> to the kvm_io_bus framework and binary-search for the responsible
> register handler based on the offset address within the region.
> We convert the incoming data (referenced by a pointer) to the host's
> endianess and use pass-by-value to hand the data over to the actual
> handler functions.
> 
> The register handler prototype and the endianess conversion are
> courtesy of Christoffer Dall.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
> Changelog RFC..v1:
> - rework MMIO dispatching to use only one kvm_io_bus device
> - document purpose of register region macros
> - rename "this" parameter to "dev"
> - change IGROUPR to be RAO (returning 1 => Group1 IRQs)
> 
> Changelog v1 .. v2:
> * MASSIVE rework:
> - store register_region pointer in kvm_io_bus linked struct
> - replace write_mask_xxx functions with extract_bytes() implementation
> - change handler functions' prototypes to take and return unsigned long
> - use binary search to find matching register handler
> - convert endianess of input data in dispatch_mmio_xxx functions
> - improve readability of register initializer macros
> - remove any GICv2/GICv3 specific functions from vgic-mmio.c
> - rename file from vgic_mmio.c to vgic-mmio.c
> 
> Changelog v2 .. v3:
> - replace inclusion of vgic/vgic.h with arm_vgic.h
> 
> Changelog v3 .. v4:
> - add IRQ number accessor macro
> - check access width in dispatcher
> - treat non-covered MMIO addresses as RAZ/WI
> - remove extract_bytes() (re-introduced as static later in the series)
> 
>  include/kvm/vgic/vgic.h       |  13 +++
>  virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
>  3 files changed, 284 insertions(+)
>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h
> 
> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> index f663288..ff3f9c2 100644
> --- a/include/kvm/vgic/vgic.h
> +++ b/include/kvm/vgic/vgic.h
> @@ -106,6 +106,16 @@ struct vgic_irq {
>  	enum vgic_irq_config config;	/* Level or edge */
>  };
>  
> +struct vgic_register_region;
> +
> +struct vgic_io_device {
> +	gpa_t base_addr;
> +	struct kvm_vcpu *redist_vcpu;
> +	const struct vgic_register_region *regions;
> +	int nr_regions;
> +	struct kvm_io_device dev;
> +};
> +
>  struct vgic_dist {
>  	bool			in_kernel;
>  	bool			ready;
> @@ -132,6 +142,9 @@ struct vgic_dist {
>  	bool			enabled;
>  
>  	struct vgic_irq		*spis;
> +
> +	struct vgic_io_device	dist_iodev;
> +	struct vgic_io_device	*redist_iodevs;
>  };
>  
>  struct vgic_v2_cpu_if {
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> new file mode 100644
> index 0000000..012b82b
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> @@ -0,0 +1,184 @@
> +/*
> + * VGIC MMIO handling functions
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/bitops.h>
> +#include <linux/bsearch.h>
> +#include <linux/kvm.h>
> +#include <linux/kvm_host.h>
> +#include <kvm/iodev.h>
> +#include <kvm/arm_vgic.h>
> +
> +#include "vgic.h"
> +#include "vgic-mmio.h"
> +
> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len)
> +{
> +	return 0;
> +}
> +
> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len)
> +{
> +	return -1UL;
> +}
> +
> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> +			unsigned int len, unsigned long val)
> +{
> +	/* Ignore */
> +}
> +
> +static int match_region(const void *key, const void *elt)
> +{
> +	const unsigned int offset = (unsigned long)key;
> +	const struct vgic_register_region *region = elt;
> +
> +	if (offset < region->reg_offset)
> +		return -1;
> +
> +	if (offset >= region->reg_offset + region->len)
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/* Find the proper register handler entry given a certain address offset. */
> +static const struct vgic_register_region *
> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
> +		      unsigned int offset)
> +{
> +	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
> +		       sizeof(region[0]), match_region);
> +}
> +
> +/*
> + * kvm_mmio_read_buf() returns a value in a format where it can be converted
> + * to a byte array and be directly observed as the guest wanted it to appear
> + * in memory if it had done the store itself, which is LE for the GIC, as the
> + * guest knows the GIC is always LE.
> + *
> + * We convert this value to the CPUs native format to deal with it as a data
> + * value.
> + */
> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
> +{
> +	unsigned long data = kvm_mmio_read_buf(val, len);
> +
> +	switch (len) {
> +	case 1:
> +		return data;
> +	case 2:
> +		return le16_to_cpu(data);
> +	case 4:
> +		return le32_to_cpu(data);
> +	default:
> +		return le64_to_cpu(data);
> +	}
> +}
> +
> +/*
> + * kvm_mmio_write_buf() expects a value in a format such that if converted to
> + * a byte array it is observed as the guest would see it if it could perform
> + * the load directly.  Since the GIC is LE, and the guest knows this, the
> + * guest expects a value in little endian format.
> + *
> + * We convert the data value from the CPUs native format to LE so that the
> + * value is returned in the proper format.
> + */
> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> +				unsigned long data)
> +{
> +	switch (len) {
> +	case 1:
> +		break;
> +	case 2:
> +		data = cpu_to_le16(data);
> +		break;
> +	case 4:
> +		data = cpu_to_le32(data);
> +		break;
> +	default:
> +		data = cpu_to_le64(data);
> +	}
> +
> +	kvm_mmio_write_buf(buf, len, data);
> +}
> +
> +static
> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
> +{
> +	return container_of(dev, struct vgic_io_device, dev);
> +}
> +
> +static bool check_region(const struct vgic_register_region *region,
> +			 gpa_t addr, int len)
> +{
> +	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
> +		return true;
> +	if ((region->access_flags & VGIC_ACCESS_32bit) &&
> +	    len == sizeof(u32) && !(addr & 3))
> +		return true;
> +	if ((region->access_flags & VGIC_ACCESS_64bit) &&
> +	    len == sizeof(u64) && !(addr & 7))
> +		return true;
> +
> +	return false;
> +}
> +
> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +			      gpa_t addr, int len, void *val)
> +{
> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> +	const struct vgic_register_region *region;
> +	struct kvm_vcpu *r_vcpu;
> +	unsigned long data;
> +
> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> +				       addr - iodev->base_addr);
> +	if (!region || !check_region(region, addr, len)) {
> +		memset(val, 0, len);
> +		return 0;
> +	}
> +
> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> +	data = region->read(r_vcpu, addr, len);
> +	vgic_data_host_to_mmio_bus(val, len, data);
> +	return 0;
> +}
> +
> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> +			       gpa_t addr, int len, const void *val)
> +{
> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> +	const struct vgic_register_region *region;
> +	struct kvm_vcpu *r_vcpu;
> +	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
> +
> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> +				       addr - iodev->base_addr);
> +	if (!region)
> +		return 0;
> +
> +	if (!check_region(region, addr, len))
> +		return 0;
> +
> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> +	region->write(r_vcpu, addr, len, data);
> +	return 0;
> +}
> +
> +struct kvm_io_device_ops kvm_io_gic_ops = {
> +	.read = dispatch_mmio_read,
> +	.write = dispatch_mmio_write,
> +};
> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
> new file mode 100644
> index 0000000..855b1db
> --- /dev/null
> +++ b/virt/kvm/arm/vgic/vgic-mmio.h
> @@ -0,0 +1,87 @@
> +/*
> + * Copyright (C) 2015, 2016 ARM Ltd.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +#ifndef __KVM_ARM_VGIC_MMIO_H__
> +#define __KVM_ARM_VGIC_MMIO_H__
> +
> +struct vgic_register_region {
> +	unsigned int reg_offset;
> +	unsigned int len;
> +	unsigned int bits_per_irq;
> +	unsigned int access_flags;
> +	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
> +			      unsigned int len);
> +	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
> +		      unsigned long val);
> +};
> +
> +extern struct kvm_io_device_ops kvm_io_gic_ops;
> +
> +#define VGIC_ACCESS_8bit	1
> +#define VGIC_ACCESS_32bit	2
> +#define VGIC_ACCESS_64bit	4
> +
> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> +					  ilog2(BITS_PER_BYTE) - 1, 0)
> +#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
> +					64 / (bits) / 8)

In the comment we end up adding here, can we also describe why
(addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL>

gives us what we need, because I don't get it.

Except for this magic formula, which I trust you and Marc have verified
and proved:

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

> +
> +/*
> + * Some VGIC registers store per-IRQ information, with a different number
> + * of bits per IRQ. For those registers this macro is used.
> + * The _WITH_LENGTH version instantiates registers with a fixed length
> + * and is mutually exclusive with the _PER_IRQ version.
> + */
> +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc)		\
> +	{								\
> +		.reg_offset = off,					\
> +		.bits_per_irq = bpi,					\
> +		.len = bpi * 1024 / 8,					\
> +		.access_flags = acc,					\
> +		.read = rd,						\
> +		.write = wr,						\
> +	}
> +
> +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc)		\
> +	{								\
> +		.reg_offset = off,					\
> +		.bits_per_irq = 0,					\
> +		.len = length,						\
> +		.access_flags = acc,					\
> +		.read = rd,						\
> +		.write = wr,						\
> +	}
> +
> +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
> +				  struct vgic_register_region *reg_desc,
> +				  struct vgic_io_device *region,
> +				  int nr_irqs, bool offset_private);
> +
> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
> +
> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> +				unsigned long data);
> +
> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len);
> +
> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> +				 gpa_t addr, unsigned int len);
> +
> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> +			unsigned int len, unsigned long val);
> +
> +#endif
> -- 
> 2.8.2
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marc Zyngier May 18, 2016, 2:12 p.m. UTC | #4
On 18/05/16 13:25, Christoffer Dall wrote:
> On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote:
>> On 16/05/16 10:53, Andre Przywara wrote:
>>> From: Marc Zyngier <marc.zyngier@arm.com>
>>>
>>> Add an MMIO handling framework to the VGIC emulation:
>>> Each register is described by its offset, size (or number of bits per
>>> IRQ, if applicable) and the read/write handler functions. We provide
>>> initialization macros to describe each GIC register later easily.
>>>
>>> Separate dispatch functions for read and write accesses are connected
>>> to the kvm_io_bus framework and binary-search for the responsible
>>> register handler based on the offset address within the region.
>>> We convert the incoming data (referenced by a pointer) to the host's
>>> endianess and use pass-by-value to hand the data over to the actual
>>> handler functions.
>>>
>>> The register handler prototype and the endianess conversion are
>>> courtesy of Christoffer Dall.
>>>
>>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
>>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>>> ---
>>> Changelog RFC..v1:
>>> - rework MMIO dispatching to use only one kvm_io_bus device
>>> - document purpose of register region macros
>>> - rename "this" parameter to "dev"
>>> - change IGROUPR to be RAO (returning 1 => Group1 IRQs)
>>>
>>> Changelog v1 .. v2:
>>> * MASSIVE rework:
>>> - store register_region pointer in kvm_io_bus linked struct
>>> - replace write_mask_xxx functions with extract_bytes() implementation
>>> - change handler functions' prototypes to take and return unsigned long
>>> - use binary search to find matching register handler
>>> - convert endianess of input data in dispatch_mmio_xxx functions
>>> - improve readability of register initializer macros
>>> - remove any GICv2/GICv3 specific functions from vgic-mmio.c
>>> - rename file from vgic_mmio.c to vgic-mmio.c
>>>
>>> Changelog v2 .. v3:
>>> - replace inclusion of vgic/vgic.h with arm_vgic.h
>>>
>>> Changelog v3 .. v4:
>>> - add IRQ number accessor macro
>>> - check access width in dispatcher
>>> - treat non-covered MMIO addresses as RAZ/WI
>>> - remove extract_bytes() (re-introduced as static later in the series)
>>>
>>>  include/kvm/vgic/vgic.h       |  13 +++
>>>  virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
>>>  virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
>>>  3 files changed, 284 insertions(+)
>>>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
>>>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h
>>>
>>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
>>> index f663288..ff3f9c2 100644
>>> --- a/include/kvm/vgic/vgic.h
>>> +++ b/include/kvm/vgic/vgic.h
>>> @@ -106,6 +106,16 @@ struct vgic_irq {
>>>  	enum vgic_irq_config config;	/* Level or edge */
>>>  };
>>>  
>>> +struct vgic_register_region;
>>> +
>>> +struct vgic_io_device {
>>> +	gpa_t base_addr;
>>> +	struct kvm_vcpu *redist_vcpu;
>>> +	const struct vgic_register_region *regions;
>>> +	int nr_regions;
>>> +	struct kvm_io_device dev;
>>> +};
>>> +
>>>  struct vgic_dist {
>>>  	bool			in_kernel;
>>>  	bool			ready;
>>> @@ -132,6 +142,9 @@ struct vgic_dist {
>>>  	bool			enabled;
>>>  
>>>  	struct vgic_irq		*spis;
>>> +
>>> +	struct vgic_io_device	dist_iodev;
>>> +	struct vgic_io_device	*redist_iodevs;
>>>  };
>>>  
>>>  struct vgic_v2_cpu_if {
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
>>> new file mode 100644
>>> index 0000000..012b82b
>>> --- /dev/null
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
>>> @@ -0,0 +1,184 @@
>>> +/*
>>> + * VGIC MMIO handling functions
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + */
>>> +
>>> +#include <linux/bitops.h>
>>> +#include <linux/bsearch.h>
>>> +#include <linux/kvm.h>
>>> +#include <linux/kvm_host.h>
>>> +#include <kvm/iodev.h>
>>> +#include <kvm/arm_vgic.h>
>>> +
>>> +#include "vgic.h"
>>> +#include "vgic-mmio.h"
>>> +
>>> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
>>> +				 gpa_t addr, unsigned int len)
>>> +{
>>> +	return 0;
>>> +}
>>> +
>>> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
>>> +				 gpa_t addr, unsigned int len)
>>> +{
>>> +	return -1UL;
>>> +}
>>> +
>>> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
>>> +			unsigned int len, unsigned long val)
>>> +{
>>> +	/* Ignore */
>>> +}
>>> +
>>> +static int match_region(const void *key, const void *elt)
>>> +{
>>> +	const unsigned int offset = (unsigned long)key;
>>> +	const struct vgic_register_region *region = elt;
>>> +
>>> +	if (offset < region->reg_offset)
>>> +		return -1;
>>> +
>>> +	if (offset >= region->reg_offset + region->len)
>>> +		return 1;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +/* Find the proper register handler entry given a certain address offset. */
>>> +static const struct vgic_register_region *
>>> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
>>> +		      unsigned int offset)
>>> +{
>>> +	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
>>> +		       sizeof(region[0]), match_region);
>>> +}
>>> +
>>> +/*
>>> + * kvm_mmio_read_buf() returns a value in a format where it can be converted
>>> + * to a byte array and be directly observed as the guest wanted it to appear
>>> + * in memory if it had done the store itself, which is LE for the GIC, as the
>>> + * guest knows the GIC is always LE.
>>> + *
>>> + * We convert this value to the CPUs native format to deal with it as a data
>>> + * value.
>>> + */
>>> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
>>> +{
>>> +	unsigned long data = kvm_mmio_read_buf(val, len);
>>> +
>>> +	switch (len) {
>>> +	case 1:
>>> +		return data;
>>> +	case 2:
>>> +		return le16_to_cpu(data);
>>> +	case 4:
>>> +		return le32_to_cpu(data);
>>> +	default:
>>> +		return le64_to_cpu(data);
>>> +	}
>>> +}
>>> +
>>> +/*
>>> + * kvm_mmio_write_buf() expects a value in a format such that if converted to
>>> + * a byte array it is observed as the guest would see it if it could perform
>>> + * the load directly.  Since the GIC is LE, and the guest knows this, the
>>> + * guest expects a value in little endian format.
>>> + *
>>> + * We convert the data value from the CPUs native format to LE so that the
>>> + * value is returned in the proper format.
>>> + */
>>> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
>>> +				unsigned long data)
>>> +{
>>> +	switch (len) {
>>> +	case 1:
>>> +		break;
>>> +	case 2:
>>> +		data = cpu_to_le16(data);
>>> +		break;
>>> +	case 4:
>>> +		data = cpu_to_le32(data);
>>> +		break;
>>> +	default:
>>> +		data = cpu_to_le64(data);
>>> +	}
>>> +
>>> +	kvm_mmio_write_buf(buf, len, data);
>>> +}
>>> +
>>> +static
>>> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
>>> +{
>>> +	return container_of(dev, struct vgic_io_device, dev);
>>> +}
>>> +
>>> +static bool check_region(const struct vgic_register_region *region,
>>> +			 gpa_t addr, int len)
>>> +{
>>> +	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
>>> +		return true;
>>> +	if ((region->access_flags & VGIC_ACCESS_32bit) &&
>>> +	    len == sizeof(u32) && !(addr & 3))
>>> +		return true;
>>> +	if ((region->access_flags & VGIC_ACCESS_64bit) &&
>>> +	    len == sizeof(u64) && !(addr & 7))
>>> +		return true;
>>> +
>>> +	return false;
>>> +}
>>> +
>>> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
>>> +			      gpa_t addr, int len, void *val)
>>> +{
>>> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
>>> +	const struct vgic_register_region *region;
>>> +	struct kvm_vcpu *r_vcpu;
>>> +	unsigned long data;
>>> +
>>> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
>>> +				       addr - iodev->base_addr);
>>> +	if (!region || !check_region(region, addr, len)) {
>>> +		memset(val, 0, len);
>>> +		return 0;
>>> +	}
>>> +
>>> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
>>> +	data = region->read(r_vcpu, addr, len);
>>> +	vgic_data_host_to_mmio_bus(val, len, data);
>>> +	return 0;
>>> +}
>>> +
>>> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
>>> +			       gpa_t addr, int len, const void *val)
>>> +{
>>> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
>>> +	const struct vgic_register_region *region;
>>> +	struct kvm_vcpu *r_vcpu;
>>> +	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
>>> +
>>> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
>>> +				       addr - iodev->base_addr);
>>> +	if (!region)
>>> +		return 0;
>>> +
>>> +	if (!check_region(region, addr, len))
>>> +		return 0;
>>> +
>>> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
>>> +	region->write(r_vcpu, addr, len, data);
>>> +	return 0;
>>> +}
>>> +
>>> +struct kvm_io_device_ops kvm_io_gic_ops = {
>>> +	.read = dispatch_mmio_read,
>>> +	.write = dispatch_mmio_write,
>>> +};
>>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
>>> new file mode 100644
>>> index 0000000..855b1db
>>> --- /dev/null
>>> +++ b/virt/kvm/arm/vgic/vgic-mmio.h
>>> @@ -0,0 +1,87 @@
>>> +/*
>>> + * Copyright (C) 2015, 2016 ARM Ltd.
>>> + *
>>> + * This program is free software; you can redistribute it and/or modify
>>> + * it under the terms of the GNU General Public License version 2 as
>>> + * published by the Free Software Foundation.
>>> + *
>>> + * This program is distributed in the hope that it will be useful,
>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>>> + * GNU General Public License for more details.
>>> + *
>>> + * You should have received a copy of the GNU General Public License
>>> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>>> + */
>>> +#ifndef __KVM_ARM_VGIC_MMIO_H__
>>> +#define __KVM_ARM_VGIC_MMIO_H__
>>> +
>>> +struct vgic_register_region {
>>> +	unsigned int reg_offset;
>>> +	unsigned int len;
>>> +	unsigned int bits_per_irq;
>>> +	unsigned int access_flags;
>>> +	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
>>> +			      unsigned int len);
>>> +	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
>>> +		      unsigned long val);
>>> +};
>>> +
>>> +extern struct kvm_io_device_ops kvm_io_gic_ops;
>>> +
>>> +#define VGIC_ACCESS_8bit	1
>>> +#define VGIC_ACCESS_32bit	2
>>> +#define VGIC_ACCESS_64bit	4
>>> +
>>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
>>
>> Hmmm. I'd appreciate some additional comments, specially when it comes
>> to the various restrictions. May I'd suggest something like:
>>
>> /*
>>  * Generate a mask that covers the number of bytes required to address
>>  * up to 1024 interrupts, each represented by <b> bits. This assumes
>>  * that <b> is a power of two.
>>  *
>>  * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
>>  * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
>>  * this to a byte address.
> 
> So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm

Yes, same thing.

> stupid enough to not understand our use of logarithms here.  Can someone
> remind me whatever I forgot at CS 101?

I'm bad at explaining that kind of things, so let me just quote
Wikipedia (https://en.wikipedia.org/wiki/Binary_logarithm):

"The number of digits (bits) in the binary representation of a positive
integer n is the integral part of 1 + log2?n"

Is that what you were missing?

> 
>>  */
>>
>>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
>>> +					  ilog2(BITS_PER_BYTE) - 1, 0)
>>
>> /*
>>  * Convert a base address into a base interrupt (each interrupt
>>  * represented by <bits> bits. This assumes that <bits> is a power
>>  * of two, that <addr> both part of a memory region aligned on a
> 
> did you mean '<addr> *is* both part of' ?

Indeed.

Thanks,

	M.
Christoffer Dall May 18, 2016, 2:29 p.m. UTC | #5
On Wed, May 18, 2016 at 03:12:33PM +0100, Marc Zyngier wrote:
> On 18/05/16 13:25, Christoffer Dall wrote:
> > On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote:
> >> On 16/05/16 10:53, Andre Przywara wrote:
> >>> From: Marc Zyngier <marc.zyngier@arm.com>
> >>>
> >>> Add an MMIO handling framework to the VGIC emulation:
> >>> Each register is described by its offset, size (or number of bits per
> >>> IRQ, if applicable) and the read/write handler functions. We provide
> >>> initialization macros to describe each GIC register later easily.
> >>>
> >>> Separate dispatch functions for read and write accesses are connected
> >>> to the kvm_io_bus framework and binary-search for the responsible
> >>> register handler based on the offset address within the region.
> >>> We convert the incoming data (referenced by a pointer) to the host's
> >>> endianess and use pass-by-value to hand the data over to the actual
> >>> handler functions.
> >>>
> >>> The register handler prototype and the endianess conversion are
> >>> courtesy of Christoffer Dall.
> >>>
> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> >>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> >>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> >>> ---
> >>> Changelog RFC..v1:
> >>> - rework MMIO dispatching to use only one kvm_io_bus device
> >>> - document purpose of register region macros
> >>> - rename "this" parameter to "dev"
> >>> - change IGROUPR to be RAO (returning 1 => Group1 IRQs)
> >>>
> >>> Changelog v1 .. v2:
> >>> * MASSIVE rework:
> >>> - store register_region pointer in kvm_io_bus linked struct
> >>> - replace write_mask_xxx functions with extract_bytes() implementation
> >>> - change handler functions' prototypes to take and return unsigned long
> >>> - use binary search to find matching register handler
> >>> - convert endianess of input data in dispatch_mmio_xxx functions
> >>> - improve readability of register initializer macros
> >>> - remove any GICv2/GICv3 specific functions from vgic-mmio.c
> >>> - rename file from vgic_mmio.c to vgic-mmio.c
> >>>
> >>> Changelog v2 .. v3:
> >>> - replace inclusion of vgic/vgic.h with arm_vgic.h
> >>>
> >>> Changelog v3 .. v4:
> >>> - add IRQ number accessor macro
> >>> - check access width in dispatcher
> >>> - treat non-covered MMIO addresses as RAZ/WI
> >>> - remove extract_bytes() (re-introduced as static later in the series)
> >>>
> >>>  include/kvm/vgic/vgic.h       |  13 +++
> >>>  virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++
> >>>  virt/kvm/arm/vgic/vgic-mmio.h |  87 ++++++++++++++++++++
> >>>  3 files changed, 284 insertions(+)
> >>>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c
> >>>  create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h
> >>>
> >>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> >>> index f663288..ff3f9c2 100644
> >>> --- a/include/kvm/vgic/vgic.h
> >>> +++ b/include/kvm/vgic/vgic.h
> >>> @@ -106,6 +106,16 @@ struct vgic_irq {
> >>>  	enum vgic_irq_config config;	/* Level or edge */
> >>>  };
> >>>  
> >>> +struct vgic_register_region;
> >>> +
> >>> +struct vgic_io_device {
> >>> +	gpa_t base_addr;
> >>> +	struct kvm_vcpu *redist_vcpu;
> >>> +	const struct vgic_register_region *regions;
> >>> +	int nr_regions;
> >>> +	struct kvm_io_device dev;
> >>> +};
> >>> +
> >>>  struct vgic_dist {
> >>>  	bool			in_kernel;
> >>>  	bool			ready;
> >>> @@ -132,6 +142,9 @@ struct vgic_dist {
> >>>  	bool			enabled;
> >>>  
> >>>  	struct vgic_irq		*spis;
> >>> +
> >>> +	struct vgic_io_device	dist_iodev;
> >>> +	struct vgic_io_device	*redist_iodevs;
> >>>  };
> >>>  
> >>>  struct vgic_v2_cpu_if {
> >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
> >>> new file mode 100644
> >>> index 0000000..012b82b
> >>> --- /dev/null
> >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c
> >>> @@ -0,0 +1,184 @@
> >>> +/*
> >>> + * VGIC MMIO handling functions
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or modify
> >>> + * it under the terms of the GNU General Public License version 2 as
> >>> + * published by the Free Software Foundation.
> >>> + *
> >>> + * This program is distributed in the hope that it will be useful,
> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >>> + * GNU General Public License for more details.
> >>> + */
> >>> +
> >>> +#include <linux/bitops.h>
> >>> +#include <linux/bsearch.h>
> >>> +#include <linux/kvm.h>
> >>> +#include <linux/kvm_host.h>
> >>> +#include <kvm/iodev.h>
> >>> +#include <kvm/arm_vgic.h>
> >>> +
> >>> +#include "vgic.h"
> >>> +#include "vgic-mmio.h"
> >>> +
> >>> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
> >>> +				 gpa_t addr, unsigned int len)
> >>> +{
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
> >>> +				 gpa_t addr, unsigned int len)
> >>> +{
> >>> +	return -1UL;
> >>> +}
> >>> +
> >>> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
> >>> +			unsigned int len, unsigned long val)
> >>> +{
> >>> +	/* Ignore */
> >>> +}
> >>> +
> >>> +static int match_region(const void *key, const void *elt)
> >>> +{
> >>> +	const unsigned int offset = (unsigned long)key;
> >>> +	const struct vgic_register_region *region = elt;
> >>> +
> >>> +	if (offset < region->reg_offset)
> >>> +		return -1;
> >>> +
> >>> +	if (offset >= region->reg_offset + region->len)
> >>> +		return 1;
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +/* Find the proper register handler entry given a certain address offset. */
> >>> +static const struct vgic_register_region *
> >>> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
> >>> +		      unsigned int offset)
> >>> +{
> >>> +	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
> >>> +		       sizeof(region[0]), match_region);
> >>> +}
> >>> +
> >>> +/*
> >>> + * kvm_mmio_read_buf() returns a value in a format where it can be converted
> >>> + * to a byte array and be directly observed as the guest wanted it to appear
> >>> + * in memory if it had done the store itself, which is LE for the GIC, as the
> >>> + * guest knows the GIC is always LE.
> >>> + *
> >>> + * We convert this value to the CPUs native format to deal with it as a data
> >>> + * value.
> >>> + */
> >>> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
> >>> +{
> >>> +	unsigned long data = kvm_mmio_read_buf(val, len);
> >>> +
> >>> +	switch (len) {
> >>> +	case 1:
> >>> +		return data;
> >>> +	case 2:
> >>> +		return le16_to_cpu(data);
> >>> +	case 4:
> >>> +		return le32_to_cpu(data);
> >>> +	default:
> >>> +		return le64_to_cpu(data);
> >>> +	}
> >>> +}
> >>> +
> >>> +/*
> >>> + * kvm_mmio_write_buf() expects a value in a format such that if converted to
> >>> + * a byte array it is observed as the guest would see it if it could perform
> >>> + * the load directly.  Since the GIC is LE, and the guest knows this, the
> >>> + * guest expects a value in little endian format.
> >>> + *
> >>> + * We convert the data value from the CPUs native format to LE so that the
> >>> + * value is returned in the proper format.
> >>> + */
> >>> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
> >>> +				unsigned long data)
> >>> +{
> >>> +	switch (len) {
> >>> +	case 1:
> >>> +		break;
> >>> +	case 2:
> >>> +		data = cpu_to_le16(data);
> >>> +		break;
> >>> +	case 4:
> >>> +		data = cpu_to_le32(data);
> >>> +		break;
> >>> +	default:
> >>> +		data = cpu_to_le64(data);
> >>> +	}
> >>> +
> >>> +	kvm_mmio_write_buf(buf, len, data);
> >>> +}
> >>> +
> >>> +static
> >>> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
> >>> +{
> >>> +	return container_of(dev, struct vgic_io_device, dev);
> >>> +}
> >>> +
> >>> +static bool check_region(const struct vgic_register_region *region,
> >>> +			 gpa_t addr, int len)
> >>> +{
> >>> +	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
> >>> +		return true;
> >>> +	if ((region->access_flags & VGIC_ACCESS_32bit) &&
> >>> +	    len == sizeof(u32) && !(addr & 3))
> >>> +		return true;
> >>> +	if ((region->access_flags & VGIC_ACCESS_64bit) &&
> >>> +	    len == sizeof(u64) && !(addr & 7))
> >>> +		return true;
> >>> +
> >>> +	return false;
> >>> +}
> >>> +
> >>> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> >>> +			      gpa_t addr, int len, void *val)
> >>> +{
> >>> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> >>> +	const struct vgic_register_region *region;
> >>> +	struct kvm_vcpu *r_vcpu;
> >>> +	unsigned long data;
> >>> +
> >>> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> >>> +				       addr - iodev->base_addr);
> >>> +	if (!region || !check_region(region, addr, len)) {
> >>> +		memset(val, 0, len);
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> >>> +	data = region->read(r_vcpu, addr, len);
> >>> +	vgic_data_host_to_mmio_bus(val, len, data);
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
> >>> +			       gpa_t addr, int len, const void *val)
> >>> +{
> >>> +	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
> >>> +	const struct vgic_register_region *region;
> >>> +	struct kvm_vcpu *r_vcpu;
> >>> +	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
> >>> +
> >>> +	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
> >>> +				       addr - iodev->base_addr);
> >>> +	if (!region)
> >>> +		return 0;
> >>> +
> >>> +	if (!check_region(region, addr, len))
> >>> +		return 0;
> >>> +
> >>> +	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
> >>> +	region->write(r_vcpu, addr, len, data);
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +struct kvm_io_device_ops kvm_io_gic_ops = {
> >>> +	.read = dispatch_mmio_read,
> >>> +	.write = dispatch_mmio_write,
> >>> +};
> >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
> >>> new file mode 100644
> >>> index 0000000..855b1db
> >>> --- /dev/null
> >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.h
> >>> @@ -0,0 +1,87 @@
> >>> +/*
> >>> + * Copyright (C) 2015, 2016 ARM Ltd.
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or modify
> >>> + * it under the terms of the GNU General Public License version 2 as
> >>> + * published by the Free Software Foundation.
> >>> + *
> >>> + * This program is distributed in the hope that it will be useful,
> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >>> + * GNU General Public License for more details.
> >>> + *
> >>> + * You should have received a copy of the GNU General Public License
> >>> + * along with this program.  If not, see <http://www.gnu.org/licenses/>.
> >>> + */
> >>> +#ifndef __KVM_ARM_VGIC_MMIO_H__
> >>> +#define __KVM_ARM_VGIC_MMIO_H__
> >>> +
> >>> +struct vgic_register_region {
> >>> +	unsigned int reg_offset;
> >>> +	unsigned int len;
> >>> +	unsigned int bits_per_irq;
> >>> +	unsigned int access_flags;
> >>> +	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
> >>> +			      unsigned int len);
> >>> +	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
> >>> +		      unsigned long val);
> >>> +};
> >>> +
> >>> +extern struct kvm_io_device_ops kvm_io_gic_ops;
> >>> +
> >>> +#define VGIC_ACCESS_8bit	1
> >>> +#define VGIC_ACCESS_32bit	2
> >>> +#define VGIC_ACCESS_64bit	4
> >>> +
> >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
> >>
> >> Hmmm. I'd appreciate some additional comments, specially when it comes
> >> to the various restrictions. May I'd suggest something like:
> >>
> >> /*
> >>  * Generate a mask that covers the number of bytes required to address
> >>  * up to 1024 interrupts, each represented by <b> bits. This assumes
> >>  * that <b> is a power of two.
> >>  *
> >>  * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
> >>  * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
> >>  * this to a byte address.
> > 
> > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm
> 
> Yes, same thing.
> 
> > stupid enough to not understand our use of logarithms here.  Can someone
> > remind me whatever I forgot at CS 101?
> 
> I'm bad at explaining that kind of things, so let me just quote
> Wikipedia (https://en.wikipedia.org/wiki/Binary_logarithm):
> 
> "The number of digits (bits) in the binary representation of a positive
> integer n is the integral part of 1 + log2?n"
> 
> Is that what you were missing?
> 

yeah, duh, I feel stupid now.  Mind if we add this "note to
Christoffer's brain" to the comment:

"Since n bits can address a maximum of N=n^2 values, we get the number of
bits required to address a number of values by applying log2(N).  With
N being 1024 * b, we get  that ilog(b) + ilog2(1024) is the number..."


> > 
> >>  */
> >>
> >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> >>> +					  ilog2(BITS_PER_BYTE) - 1, 0)
> >>
> >> /*
> >>  * Convert a base address into a base interrupt (each interrupt
> >>  * represented by <bits> bits. This assumes that <bits> is a power
> >>  * of two, that <addr> both part of a memory region aligned on a
> > 
> > did you mean '<addr> *is* both part of' ?
> 
> Indeed.
> 

Thanks,
-Christoffer
Andre Przywara May 18, 2016, 3:55 p.m. UTC | #6
Hi,

....

>> +
>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
>> +					  ilog2(BITS_PER_BYTE) - 1, 0)
>> +#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
>> +					64 / (bits) / 8)
> 
> In the comment we end up adding here, can we also describe why
> (addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL>
> gives us what we need, because I don't get it.

The reason is: we deal with 8 bits per byte, but have
bits-per-interrupts values bigger than 8. Doing the maths in floating
point arithmetic would work fine:

(float)(addr & mask) * (8.0 / bits_per_IRQ)

So would this comment make sense?

/*
 * Since we can have more than 8 bits per interrupt, we can't use
 * "8 / bpi" as a multiplicand directly, so we use a
 * fixed-point-arithmetic version of it tailored to cover at most 64
 * bits per IRQ.
 */

Cheers,
Andre.
Andre Przywara May 18, 2016, 4:46 p.m. UTC | #7
Hi,

>>> +
>>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
>>
>> Hmmm. I'd appreciate some additional comments, specially when it comes
>> to the various restrictions. May I'd suggest something like:
>>
>> /*
>>  * Generate a mask that covers the number of bytes required to address
>>  * up to 1024 interrupts, each represented by <b> bits. This assumes
>>  * that <b> is a power of two.
>>  *
>>  * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
>>  * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
>>  * this to a byte address.
> 
> So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm
> stupid enough to not understand our use of logarithms here.  Can someone
> remind me whatever I forgot at CS 101?

I guess it was more me not seeing the wood for the trees here:
Indeed doing the multiplication first and then calling ilog2 seems to
make more sense. Also I was thinking: Isn't
"GENMASK_ULL(ilog2(n) - 1, 0)" the same as "n - 1"?

So can't we just write:

#define VGIC_ADDR_IRQ_MASK(bpi) (((bpi) * 1024 / 8) - 1)

Proven by enumeration - over the values we use ;-)

I'd keep the first paragraph of Marc's comment above then, but we can
avoid mentioning Advanced Maths textbooks about binary logarithmic ;-)

Cheers,
Andre.

>>  */
>>
>>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
>>> +					  ilog2(BITS_PER_BYTE) - 1, 0)
>>
>> /*
>>  * Convert a base address into a base interrupt (each interrupt
>>  * represented by <bits> bits. This assumes that <bits> is a power
>>  * of two, that <addr> both part of a memory region aligned on a
> 
> did you mean '<addr> *is* both part of' ?
> 
>>  * <b> bits boundary, and itself aligned on that same boundary
>>  * (for regions that describe an interrupt with more than a single
>>  * byte of data).
>>  */
>>
> 
> In any case, thanks for the commentary, I was faily lost here.
>
Christoffer Dall May 18, 2016, 5:08 p.m. UTC | #8
On Wed, May 18, 2016 at 05:46:55PM +0100, Andre Przywara wrote:
> Hi,
> 
> >>> +
> >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
> >>
> >> Hmmm. I'd appreciate some additional comments, specially when it comes
> >> to the various restrictions. May I'd suggest something like:
> >>
> >> /*
> >>  * Generate a mask that covers the number of bytes required to address
> >>  * up to 1024 interrupts, each represented by <b> bits. This assumes
> >>  * that <b> is a power of two.
> >>  *
> >>  * ilog2(b) + ilog2(1024) is the number of bits required to bit-address
> >>  * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts
> >>  * this to a byte address.
> > 
> > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm
> > stupid enough to not understand our use of logarithms here.  Can someone
> > remind me whatever I forgot at CS 101?
> 
> I guess it was more me not seeing the wood for the trees here:
> Indeed doing the multiplication first and then calling ilog2 seems to
> make more sense. Also I was thinking: Isn't
> "GENMASK_ULL(ilog2(n) - 1, 0)" the same as "n - 1"?

if there's no integer rounding taking place with ilog2 (iow. n is a
power of 2) then yes, I believe it is.

> 
> So can't we just write:
> 
> #define VGIC_ADDR_IRQ_MASK(bpi) (((bpi) * 1024 / 8) - 1)

that certianly all of the sudden feels intuitive.

> 
> Proven by enumeration - over the values we use ;-)
> 
> I'd keep the first paragraph of Marc's comment above then, but we can
> avoid mentioning Advanced Maths textbooks about binary logarithmic ;-)
> 

Haha, you saved my day with that comment.  I feel slightly less idiotic
now, yes, let's call it advanced quantum math or something instead of CS
101 :)

-Christoffer

> 
> >>  */
> >>
> >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> >>> +					  ilog2(BITS_PER_BYTE) - 1, 0)
> >>
> >> /*
> >>  * Convert a base address into a base interrupt (each interrupt
> >>  * represented by <bits> bits. This assumes that <bits> is a power
> >>  * of two, that <addr> both part of a memory region aligned on a
> > 
> > did you mean '<addr> *is* both part of' ?
> > 
> >>  * <b> bits boundary, and itself aligned on that same boundary
> >>  * (for regions that describe an interrupt with more than a single
> >>  * byte of data).
> >>  */
> >>
> > 
> > In any case, thanks for the commentary, I was faily lost here.
> >
Christoffer Dall May 18, 2016, 6:06 p.m. UTC | #9
On Wed, May 18, 2016 at 04:55:45PM +0100, Andre Przywara wrote:
> Hi,
> 
> ....
> 
> >> +
> >> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
> >> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
> >> +					  ilog2(BITS_PER_BYTE) - 1, 0)
> >> +#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
> >> +					64 / (bits) / 8)
> > 
> > In the comment we end up adding here, can we also describe why
> > (addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL>
> > gives us what we need, because I don't get it.
> 
> The reason is: we deal with 8 bits per byte, but have
> bits-per-interrupts values bigger than 8. Doing the maths in floating
> point arithmetic would work fine:
> 
> (float)(addr & mask) * (8.0 / bits_per_IRQ)
> 
> So would this comment make sense?
> 
> /*
>  * Since we can have more than 8 bits per interrupt, we can't use
>  * "8 / bpi" as a multiplicand directly, so we use a
>  * fixed-point-arithmetic version of it tailored to cover at most 64
>  * bits per IRQ.
>  */
> 

Something like this certainly helps, here's another version which is
easier for me to understand, but you can take your pick:

 /*
  * (addr & mask) gives us the byte offset for the INT ID, so we want to
  * divide this with 'bytes per irq' to get the INT ID, which is given
  * by '(bits) / 8'.  But we do this with fixed-point-arithmetic and
  * take advantage of the fact that division by a fraction equals
  * multiplication with the inverted fraction, and scale up both the
  * numerator and denominator with 8 to support at most 64 bits per IRQ:
  */

At least I think we all agree that the approach works by now.

-Christoffer
diff mbox

Patch

diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
index f663288..ff3f9c2 100644
--- a/include/kvm/vgic/vgic.h
+++ b/include/kvm/vgic/vgic.h
@@ -106,6 +106,16 @@  struct vgic_irq {
 	enum vgic_irq_config config;	/* Level or edge */
 };
 
+struct vgic_register_region;
+
+struct vgic_io_device {
+	gpa_t base_addr;
+	struct kvm_vcpu *redist_vcpu;
+	const struct vgic_register_region *regions;
+	int nr_regions;
+	struct kvm_io_device dev;
+};
+
 struct vgic_dist {
 	bool			in_kernel;
 	bool			ready;
@@ -132,6 +142,9 @@  struct vgic_dist {
 	bool			enabled;
 
 	struct vgic_irq		*spis;
+
+	struct vgic_io_device	dist_iodev;
+	struct vgic_io_device	*redist_iodevs;
 };
 
 struct vgic_v2_cpu_if {
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
new file mode 100644
index 0000000..012b82b
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -0,0 +1,184 @@ 
+/*
+ * VGIC MMIO handling functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bsearch.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/iodev.h>
+#include <kvm/arm_vgic.h>
+
+#include "vgic.h"
+#include "vgic-mmio.h"
+
+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
+				 gpa_t addr, unsigned int len)
+{
+	return 0;
+}
+
+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
+				 gpa_t addr, unsigned int len)
+{
+	return -1UL;
+}
+
+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+			unsigned int len, unsigned long val)
+{
+	/* Ignore */
+}
+
+static int match_region(const void *key, const void *elt)
+{
+	const unsigned int offset = (unsigned long)key;
+	const struct vgic_register_region *region = elt;
+
+	if (offset < region->reg_offset)
+		return -1;
+
+	if (offset >= region->reg_offset + region->len)
+		return 1;
+
+	return 0;
+}
+
+/* Find the proper register handler entry given a certain address offset. */
+static const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
+		      unsigned int offset)
+{
+	return bsearch((void *)(uintptr_t)offset, region, nr_regions,
+		       sizeof(region[0]), match_region);
+}
+
+/*
+ * kvm_mmio_read_buf() returns a value in a format where it can be converted
+ * to a byte array and be directly observed as the guest wanted it to appear
+ * in memory if it had done the store itself, which is LE for the GIC, as the
+ * guest knows the GIC is always LE.
+ *
+ * We convert this value to the CPUs native format to deal with it as a data
+ * value.
+ */
+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
+{
+	unsigned long data = kvm_mmio_read_buf(val, len);
+
+	switch (len) {
+	case 1:
+		return data;
+	case 2:
+		return le16_to_cpu(data);
+	case 4:
+		return le32_to_cpu(data);
+	default:
+		return le64_to_cpu(data);
+	}
+}
+
+/*
+ * kvm_mmio_write_buf() expects a value in a format such that if converted to
+ * a byte array it is observed as the guest would see it if it could perform
+ * the load directly.  Since the GIC is LE, and the guest knows this, the
+ * guest expects a value in little endian format.
+ *
+ * We convert the data value from the CPUs native format to LE so that the
+ * value is returned in the proper format.
+ */
+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
+				unsigned long data)
+{
+	switch (len) {
+	case 1:
+		break;
+	case 2:
+		data = cpu_to_le16(data);
+		break;
+	case 4:
+		data = cpu_to_le32(data);
+		break;
+	default:
+		data = cpu_to_le64(data);
+	}
+
+	kvm_mmio_write_buf(buf, len, data);
+}
+
+static
+struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
+{
+	return container_of(dev, struct vgic_io_device, dev);
+}
+
+static bool check_region(const struct vgic_register_region *region,
+			 gpa_t addr, int len)
+{
+	if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
+		return true;
+	if ((region->access_flags & VGIC_ACCESS_32bit) &&
+	    len == sizeof(u32) && !(addr & 3))
+		return true;
+	if ((region->access_flags & VGIC_ACCESS_64bit) &&
+	    len == sizeof(u64) && !(addr & 7))
+		return true;
+
+	return false;
+}
+
+static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			      gpa_t addr, int len, void *val)
+{
+	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+	const struct vgic_register_region *region;
+	struct kvm_vcpu *r_vcpu;
+	unsigned long data;
+
+	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+				       addr - iodev->base_addr);
+	if (!region || !check_region(region, addr, len)) {
+		memset(val, 0, len);
+		return 0;
+	}
+
+	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+	data = region->read(r_vcpu, addr, len);
+	vgic_data_host_to_mmio_bus(val, len, data);
+	return 0;
+}
+
+static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+			       gpa_t addr, int len, const void *val)
+{
+	struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+	const struct vgic_register_region *region;
+	struct kvm_vcpu *r_vcpu;
+	unsigned long data = vgic_data_mmio_bus_to_host(val, len);
+
+	region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+				       addr - iodev->base_addr);
+	if (!region)
+		return 0;
+
+	if (!check_region(region, addr, len))
+		return 0;
+
+	r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+	region->write(r_vcpu, addr, len, data);
+	return 0;
+}
+
+struct kvm_io_device_ops kvm_io_gic_ops = {
+	.read = dispatch_mmio_read,
+	.write = dispatch_mmio_write,
+};
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
new file mode 100644
index 0000000..855b1db
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -0,0 +1,87 @@ 
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __KVM_ARM_VGIC_MMIO_H__
+#define __KVM_ARM_VGIC_MMIO_H__
+
+struct vgic_register_region {
+	unsigned int reg_offset;
+	unsigned int len;
+	unsigned int bits_per_irq;
+	unsigned int access_flags;
+	unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
+			      unsigned int len);
+	void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
+		      unsigned long val);
+};
+
+extern struct kvm_io_device_ops kvm_io_gic_ops;
+
+#define VGIC_ACCESS_8bit	1
+#define VGIC_ACCESS_32bit	2
+#define VGIC_ACCESS_64bit	4
+
+/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */
+#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \
+					  ilog2(BITS_PER_BYTE) - 1, 0)
+#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
+					64 / (bits) / 8)
+
+/*
+ * Some VGIC registers store per-IRQ information, with a different number
+ * of bits per IRQ. For those registers this macro is used.
+ * The _WITH_LENGTH version instantiates registers with a fixed length
+ * and is mutually exclusive with the _PER_IRQ version.
+ */
+#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc)		\
+	{								\
+		.reg_offset = off,					\
+		.bits_per_irq = bpi,					\
+		.len = bpi * 1024 / 8,					\
+		.access_flags = acc,					\
+		.read = rd,						\
+		.write = wr,						\
+	}
+
+#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc)		\
+	{								\
+		.reg_offset = off,					\
+		.bits_per_irq = 0,					\
+		.len = length,						\
+		.access_flags = acc,					\
+		.read = rd,						\
+		.write = wr,						\
+	}
+
+int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
+				  struct vgic_register_region *reg_desc,
+				  struct vgic_io_device *region,
+				  int nr_irqs, bool offset_private);
+
+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
+
+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
+				unsigned long data);
+
+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
+				 gpa_t addr, unsigned int len);
+
+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
+				 gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+			unsigned int len, unsigned long val);
+
+#endif