Message ID | 1463392481-26583-23-git-send-email-andre.przywara@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 16/05/16 10:53, Andre Przywara wrote: > From: Marc Zyngier <marc.zyngier@arm.com> > > Add an MMIO handling framework to the VGIC emulation: > Each register is described by its offset, size (or number of bits per > IRQ, if applicable) and the read/write handler functions. We provide > initialization macros to describe each GIC register later easily. > > Separate dispatch functions for read and write accesses are connected > to the kvm_io_bus framework and binary-search for the responsible > register handler based on the offset address within the region. > We convert the incoming data (referenced by a pointer) to the host's > endianess and use pass-by-value to hand the data over to the actual > handler functions. > > The register handler prototype and the endianess conversion are > courtesy of Christoffer Dall. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > Signed-off-by: Andre Przywara <andre.przywara@arm.com> > --- > Changelog RFC..v1: > - rework MMIO dispatching to use only one kvm_io_bus device > - document purpose of register region macros > - rename "this" parameter to "dev" > - change IGROUPR to be RAO (returning 1 => Group1 IRQs) > > Changelog v1 .. v2: > * MASSIVE rework: > - store register_region pointer in kvm_io_bus linked struct > - replace write_mask_xxx functions with extract_bytes() implementation > - change handler functions' prototypes to take and return unsigned long > - use binary search to find matching register handler > - convert endianess of input data in dispatch_mmio_xxx functions > - improve readability of register initializer macros > - remove any GICv2/GICv3 specific functions from vgic-mmio.c > - rename file from vgic_mmio.c to vgic-mmio.c > > Changelog v2 .. v3: > - replace inclusion of vgic/vgic.h with arm_vgic.h > > Changelog v3 .. v4: > - add IRQ number accessor macro > - check access width in dispatcher > - treat non-covered MMIO addresses as RAZ/WI > - remove extract_bytes() (re-introduced as static later in the series) > > include/kvm/vgic/vgic.h | 13 +++ > virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++ > virt/kvm/arm/vgic/vgic-mmio.h | 87 ++++++++++++++++++++ > 3 files changed, 284 insertions(+) > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h > > diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h > index f663288..ff3f9c2 100644 > --- a/include/kvm/vgic/vgic.h > +++ b/include/kvm/vgic/vgic.h > @@ -106,6 +106,16 @@ struct vgic_irq { > enum vgic_irq_config config; /* Level or edge */ > }; > > +struct vgic_register_region; > + > +struct vgic_io_device { > + gpa_t base_addr; > + struct kvm_vcpu *redist_vcpu; > + const struct vgic_register_region *regions; > + int nr_regions; > + struct kvm_io_device dev; > +}; > + > struct vgic_dist { > bool in_kernel; > bool ready; > @@ -132,6 +142,9 @@ struct vgic_dist { > bool enabled; > > struct vgic_irq *spis; > + > + struct vgic_io_device dist_iodev; > + struct vgic_io_device *redist_iodevs; > }; > > struct vgic_v2_cpu_if { > diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c > new file mode 100644 > index 0000000..012b82b > --- /dev/null > +++ b/virt/kvm/arm/vgic/vgic-mmio.c > @@ -0,0 +1,184 @@ > +/* > + * VGIC MMIO handling functions > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#include <linux/bitops.h> > +#include <linux/bsearch.h> > +#include <linux/kvm.h> > +#include <linux/kvm_host.h> > +#include <kvm/iodev.h> > +#include <kvm/arm_vgic.h> > + > +#include "vgic.h" > +#include "vgic-mmio.h" > + > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len) > +{ > + return 0; > +} > + > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len) > +{ > + return -1UL; > +} > + > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len, unsigned long val) > +{ > + /* Ignore */ > +} > + > +static int match_region(const void *key, const void *elt) > +{ > + const unsigned int offset = (unsigned long)key; > + const struct vgic_register_region *region = elt; > + > + if (offset < region->reg_offset) > + return -1; > + > + if (offset >= region->reg_offset + region->len) > + return 1; > + > + return 0; > +} > + > +/* Find the proper register handler entry given a certain address offset. */ > +static const struct vgic_register_region * > +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, > + unsigned int offset) > +{ > + return bsearch((void *)(uintptr_t)offset, region, nr_regions, > + sizeof(region[0]), match_region); > +} > + > +/* > + * kvm_mmio_read_buf() returns a value in a format where it can be converted > + * to a byte array and be directly observed as the guest wanted it to appear > + * in memory if it had done the store itself, which is LE for the GIC, as the > + * guest knows the GIC is always LE. > + * > + * We convert this value to the CPUs native format to deal with it as a data > + * value. > + */ > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) > +{ > + unsigned long data = kvm_mmio_read_buf(val, len); > + > + switch (len) { > + case 1: > + return data; > + case 2: > + return le16_to_cpu(data); > + case 4: > + return le32_to_cpu(data); > + default: > + return le64_to_cpu(data); > + } > +} > + > +/* > + * kvm_mmio_write_buf() expects a value in a format such that if converted to > + * a byte array it is observed as the guest would see it if it could perform > + * the load directly. Since the GIC is LE, and the guest knows this, the > + * guest expects a value in little endian format. > + * > + * We convert the data value from the CPUs native format to LE so that the > + * value is returned in the proper format. > + */ > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > + unsigned long data) > +{ > + switch (len) { > + case 1: > + break; > + case 2: > + data = cpu_to_le16(data); > + break; > + case 4: > + data = cpu_to_le32(data); > + break; > + default: > + data = cpu_to_le64(data); > + } > + > + kvm_mmio_write_buf(buf, len, data); > +} > + > +static > +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) > +{ > + return container_of(dev, struct vgic_io_device, dev); > +} > + > +static bool check_region(const struct vgic_register_region *region, > + gpa_t addr, int len) > +{ > + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) > + return true; > + if ((region->access_flags & VGIC_ACCESS_32bit) && > + len == sizeof(u32) && !(addr & 3)) > + return true; > + if ((region->access_flags & VGIC_ACCESS_64bit) && > + len == sizeof(u64) && !(addr & 7)) > + return true; > + > + return false; > +} > + > +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > + const struct vgic_register_region *region; > + struct kvm_vcpu *r_vcpu; > + unsigned long data; > + > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > + addr - iodev->base_addr); > + if (!region || !check_region(region, addr, len)) { > + memset(val, 0, len); > + return 0; > + } > + > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > + data = region->read(r_vcpu, addr, len); > + vgic_data_host_to_mmio_bus(val, len, data); > + return 0; > +} > + > +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > + gpa_t addr, int len, const void *val) > +{ > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > + const struct vgic_register_region *region; > + struct kvm_vcpu *r_vcpu; > + unsigned long data = vgic_data_mmio_bus_to_host(val, len); > + > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > + addr - iodev->base_addr); > + if (!region) > + return 0; > + > + if (!check_region(region, addr, len)) > + return 0; > + > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > + region->write(r_vcpu, addr, len, data); > + return 0; > +} > + > +struct kvm_io_device_ops kvm_io_gic_ops = { > + .read = dispatch_mmio_read, > + .write = dispatch_mmio_write, > +}; > diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h > new file mode 100644 > index 0000000..855b1db > --- /dev/null > +++ b/virt/kvm/arm/vgic/vgic-mmio.h > @@ -0,0 +1,87 @@ > +/* > + * Copyright (C) 2015, 2016 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > +#ifndef __KVM_ARM_VGIC_MMIO_H__ > +#define __KVM_ARM_VGIC_MMIO_H__ > + > +struct vgic_register_region { > + unsigned int reg_offset; > + unsigned int len; > + unsigned int bits_per_irq; > + unsigned int access_flags; > + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len); > + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, > + unsigned long val); > +}; > + > +extern struct kvm_io_device_ops kvm_io_gic_ops; > + > +#define VGIC_ACCESS_8bit 1 > +#define VGIC_ACCESS_32bit 2 > +#define VGIC_ACCESS_64bit 4 > + > +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ Hmmm. I'd appreciate some additional comments, specially when it comes to the various restrictions. May I'd suggest something like: /* * Generate a mask that covers the number of bytes required to address * up to 1024 interrupts, each represented by <b> bits. This assumes * that <b> is a power of two. * * ilog2(b) + ilog2(1024) is the number of bits required to bit-address * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts * this to a byte address. */ > +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > + ilog2(BITS_PER_BYTE) - 1, 0) /* * Convert a base address into a base interrupt (each interrupt * represented by <bits> bits. This assumes that <bits> is a power * of two, that <addr> both part of a memory region aligned on a * <b> bits boundary, and itself aligned on that same boundary * (for regions that describe an interrupt with more than a single * byte of data). */ > +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ > + 64 / (bits) / 8) > + > +/* > + * Some VGIC registers store per-IRQ information, with a different number > + * of bits per IRQ. For those registers this macro is used. > + * The _WITH_LENGTH version instantiates registers with a fixed length > + * and is mutually exclusive with the _PER_IRQ version. > + */ > +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ > + { \ > + .reg_offset = off, \ > + .bits_per_irq = bpi, \ > + .len = bpi * 1024 / 8, \ > + .access_flags = acc, \ > + .read = rd, \ > + .write = wr, \ > + } > + > +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ > + { \ > + .reg_offset = off, \ > + .bits_per_irq = 0, \ > + .len = length, \ > + .access_flags = acc, \ > + .read = rd, \ > + .write = wr, \ > + } > + > +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, > + struct vgic_register_region *reg_desc, > + struct vgic_io_device *region, > + int nr_irqs, bool offset_private); > + > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); > + > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > + unsigned long data); > + > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len); > + > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len); > + > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len, unsigned long val); > + > +#endif > Thanks, M.
On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote: > On 16/05/16 10:53, Andre Przywara wrote: > > From: Marc Zyngier <marc.zyngier@arm.com> > > > > Add an MMIO handling framework to the VGIC emulation: > > Each register is described by its offset, size (or number of bits per > > IRQ, if applicable) and the read/write handler functions. We provide > > initialization macros to describe each GIC register later easily. > > > > Separate dispatch functions for read and write accesses are connected > > to the kvm_io_bus framework and binary-search for the responsible > > register handler based on the offset address within the region. > > We convert the incoming data (referenced by a pointer) to the host's > > endianess and use pass-by-value to hand the data over to the actual > > handler functions. > > > > The register handler prototype and the endianess conversion are > > courtesy of Christoffer Dall. > > > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > > Signed-off-by: Andre Przywara <andre.przywara@arm.com> > > --- > > Changelog RFC..v1: > > - rework MMIO dispatching to use only one kvm_io_bus device > > - document purpose of register region macros > > - rename "this" parameter to "dev" > > - change IGROUPR to be RAO (returning 1 => Group1 IRQs) > > > > Changelog v1 .. v2: > > * MASSIVE rework: > > - store register_region pointer in kvm_io_bus linked struct > > - replace write_mask_xxx functions with extract_bytes() implementation > > - change handler functions' prototypes to take and return unsigned long > > - use binary search to find matching register handler > > - convert endianess of input data in dispatch_mmio_xxx functions > > - improve readability of register initializer macros > > - remove any GICv2/GICv3 specific functions from vgic-mmio.c > > - rename file from vgic_mmio.c to vgic-mmio.c > > > > Changelog v2 .. v3: > > - replace inclusion of vgic/vgic.h with arm_vgic.h > > > > Changelog v3 .. v4: > > - add IRQ number accessor macro > > - check access width in dispatcher > > - treat non-covered MMIO addresses as RAZ/WI > > - remove extract_bytes() (re-introduced as static later in the series) > > > > include/kvm/vgic/vgic.h | 13 +++ > > virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++ > > virt/kvm/arm/vgic/vgic-mmio.h | 87 ++++++++++++++++++++ > > 3 files changed, 284 insertions(+) > > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c > > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h > > > > diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h > > index f663288..ff3f9c2 100644 > > --- a/include/kvm/vgic/vgic.h > > +++ b/include/kvm/vgic/vgic.h > > @@ -106,6 +106,16 @@ struct vgic_irq { > > enum vgic_irq_config config; /* Level or edge */ > > }; > > > > +struct vgic_register_region; > > + > > +struct vgic_io_device { > > + gpa_t base_addr; > > + struct kvm_vcpu *redist_vcpu; > > + const struct vgic_register_region *regions; > > + int nr_regions; > > + struct kvm_io_device dev; > > +}; > > + > > struct vgic_dist { > > bool in_kernel; > > bool ready; > > @@ -132,6 +142,9 @@ struct vgic_dist { > > bool enabled; > > > > struct vgic_irq *spis; > > + > > + struct vgic_io_device dist_iodev; > > + struct vgic_io_device *redist_iodevs; > > }; > > > > struct vgic_v2_cpu_if { > > diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c > > new file mode 100644 > > index 0000000..012b82b > > --- /dev/null > > +++ b/virt/kvm/arm/vgic/vgic-mmio.c > > @@ -0,0 +1,184 @@ > > +/* > > + * VGIC MMIO handling functions > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + */ > > + > > +#include <linux/bitops.h> > > +#include <linux/bsearch.h> > > +#include <linux/kvm.h> > > +#include <linux/kvm_host.h> > > +#include <kvm/iodev.h> > > +#include <kvm/arm_vgic.h> > > + > > +#include "vgic.h" > > +#include "vgic-mmio.h" > > + > > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > > + gpa_t addr, unsigned int len) > > +{ > > + return 0; > > +} > > + > > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > > + gpa_t addr, unsigned int len) > > +{ > > + return -1UL; > > +} > > + > > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > > + unsigned int len, unsigned long val) > > +{ > > + /* Ignore */ > > +} > > + > > +static int match_region(const void *key, const void *elt) > > +{ > > + const unsigned int offset = (unsigned long)key; > > + const struct vgic_register_region *region = elt; > > + > > + if (offset < region->reg_offset) > > + return -1; > > + > > + if (offset >= region->reg_offset + region->len) > > + return 1; > > + > > + return 0; > > +} > > + > > +/* Find the proper register handler entry given a certain address offset. */ > > +static const struct vgic_register_region * > > +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, > > + unsigned int offset) > > +{ > > + return bsearch((void *)(uintptr_t)offset, region, nr_regions, > > + sizeof(region[0]), match_region); > > +} > > + > > +/* > > + * kvm_mmio_read_buf() returns a value in a format where it can be converted > > + * to a byte array and be directly observed as the guest wanted it to appear > > + * in memory if it had done the store itself, which is LE for the GIC, as the > > + * guest knows the GIC is always LE. > > + * > > + * We convert this value to the CPUs native format to deal with it as a data > > + * value. > > + */ > > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) > > +{ > > + unsigned long data = kvm_mmio_read_buf(val, len); > > + > > + switch (len) { > > + case 1: > > + return data; > > + case 2: > > + return le16_to_cpu(data); > > + case 4: > > + return le32_to_cpu(data); > > + default: > > + return le64_to_cpu(data); > > + } > > +} > > + > > +/* > > + * kvm_mmio_write_buf() expects a value in a format such that if converted to > > + * a byte array it is observed as the guest would see it if it could perform > > + * the load directly. Since the GIC is LE, and the guest knows this, the > > + * guest expects a value in little endian format. > > + * > > + * We convert the data value from the CPUs native format to LE so that the > > + * value is returned in the proper format. > > + */ > > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > > + unsigned long data) > > +{ > > + switch (len) { > > + case 1: > > + break; > > + case 2: > > + data = cpu_to_le16(data); > > + break; > > + case 4: > > + data = cpu_to_le32(data); > > + break; > > + default: > > + data = cpu_to_le64(data); > > + } > > + > > + kvm_mmio_write_buf(buf, len, data); > > +} > > + > > +static > > +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) > > +{ > > + return container_of(dev, struct vgic_io_device, dev); > > +} > > + > > +static bool check_region(const struct vgic_register_region *region, > > + gpa_t addr, int len) > > +{ > > + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) > > + return true; > > + if ((region->access_flags & VGIC_ACCESS_32bit) && > > + len == sizeof(u32) && !(addr & 3)) > > + return true; > > + if ((region->access_flags & VGIC_ACCESS_64bit) && > > + len == sizeof(u64) && !(addr & 7)) > > + return true; > > + > > + return false; > > +} > > + > > +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > > + gpa_t addr, int len, void *val) > > +{ > > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > > + const struct vgic_register_region *region; > > + struct kvm_vcpu *r_vcpu; > > + unsigned long data; > > + > > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > > + addr - iodev->base_addr); > > + if (!region || !check_region(region, addr, len)) { > > + memset(val, 0, len); > > + return 0; > > + } > > + > > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > > + data = region->read(r_vcpu, addr, len); > > + vgic_data_host_to_mmio_bus(val, len, data); > > + return 0; > > +} > > + > > +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > > + gpa_t addr, int len, const void *val) > > +{ > > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > > + const struct vgic_register_region *region; > > + struct kvm_vcpu *r_vcpu; > > + unsigned long data = vgic_data_mmio_bus_to_host(val, len); > > + > > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > > + addr - iodev->base_addr); > > + if (!region) > > + return 0; > > + > > + if (!check_region(region, addr, len)) > > + return 0; > > + > > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > > + region->write(r_vcpu, addr, len, data); > > + return 0; > > +} > > + > > +struct kvm_io_device_ops kvm_io_gic_ops = { > > + .read = dispatch_mmio_read, > > + .write = dispatch_mmio_write, > > +}; > > diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h > > new file mode 100644 > > index 0000000..855b1db > > --- /dev/null > > +++ b/virt/kvm/arm/vgic/vgic-mmio.h > > @@ -0,0 +1,87 @@ > > +/* > > + * Copyright (C) 2015, 2016 ARM Ltd. > > + * > > + * This program is free software; you can redistribute it and/or modify > > + * it under the terms of the GNU General Public License version 2 as > > + * published by the Free Software Foundation. > > + * > > + * This program is distributed in the hope that it will be useful, > > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + * GNU General Public License for more details. > > + * > > + * You should have received a copy of the GNU General Public License > > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > > + */ > > +#ifndef __KVM_ARM_VGIC_MMIO_H__ > > +#define __KVM_ARM_VGIC_MMIO_H__ > > + > > +struct vgic_register_region { > > + unsigned int reg_offset; > > + unsigned int len; > > + unsigned int bits_per_irq; > > + unsigned int access_flags; > > + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, > > + unsigned int len); > > + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, > > + unsigned long val); > > +}; > > + > > +extern struct kvm_io_device_ops kvm_io_gic_ops; > > + > > +#define VGIC_ACCESS_8bit 1 > > +#define VGIC_ACCESS_32bit 2 > > +#define VGIC_ACCESS_64bit 4 > > + > > +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ > > Hmmm. I'd appreciate some additional comments, specially when it comes > to the various restrictions. May I'd suggest something like: > > /* > * Generate a mask that covers the number of bytes required to address > * up to 1024 interrupts, each represented by <b> bits. This assumes > * that <b> is a power of two. > * > * ilog2(b) + ilog2(1024) is the number of bits required to bit-address > * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts > * this to a byte address. So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm stupid enough to not understand our use of logarithms here. Can someone remind me whatever I forgot at CS 101? > */ > > > +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > > + ilog2(BITS_PER_BYTE) - 1, 0) > > /* > * Convert a base address into a base interrupt (each interrupt > * represented by <bits> bits. This assumes that <bits> is a power > * of two, that <addr> both part of a memory region aligned on a did you mean '<addr> *is* both part of' ? > * <b> bits boundary, and itself aligned on that same boundary > * (for regions that describe an interrupt with more than a single > * byte of data). > */ > In any case, thanks for the commentary, I was faily lost here. -Christoffer > > +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ > > + 64 / (bits) / 8) > > + > > +/* > > + * Some VGIC registers store per-IRQ information, with a different number > > + * of bits per IRQ. For those registers this macro is used. > > + * The _WITH_LENGTH version instantiates registers with a fixed length > > + * and is mutually exclusive with the _PER_IRQ version. > > + */ > > +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ > > + { \ > > + .reg_offset = off, \ > > + .bits_per_irq = bpi, \ > > + .len = bpi * 1024 / 8, \ > > + .access_flags = acc, \ > > + .read = rd, \ > > + .write = wr, \ > > + } > > + > > +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ > > + { \ > > + .reg_offset = off, \ > > + .bits_per_irq = 0, \ > > + .len = length, \ > > + .access_flags = acc, \ > > + .read = rd, \ > > + .write = wr, \ > > + } > > + > > +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, > > + struct vgic_register_region *reg_desc, > > + struct vgic_io_device *region, > > + int nr_irqs, bool offset_private); > > + > > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); > > + > > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > > + unsigned long data); > > + > > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > > + gpa_t addr, unsigned int len); > > + > > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > > + gpa_t addr, unsigned int len); > > + > > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > > + unsigned int len, unsigned long val); > > + > > +#endif > > > > Thanks, > > M. > -- > Jazz is not dead. It just smells funny...
On Mon, May 16, 2016 at 10:53:10AM +0100, Andre Przywara wrote: > From: Marc Zyngier <marc.zyngier@arm.com> > > Add an MMIO handling framework to the VGIC emulation: > Each register is described by its offset, size (or number of bits per > IRQ, if applicable) and the read/write handler functions. We provide > initialization macros to describe each GIC register later easily. > > Separate dispatch functions for read and write accesses are connected > to the kvm_io_bus framework and binary-search for the responsible > register handler based on the offset address within the region. > We convert the incoming data (referenced by a pointer) to the host's > endianess and use pass-by-value to hand the data over to the actual > handler functions. > > The register handler prototype and the endianess conversion are > courtesy of Christoffer Dall. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > Signed-off-by: Andre Przywara <andre.przywara@arm.com> > --- > Changelog RFC..v1: > - rework MMIO dispatching to use only one kvm_io_bus device > - document purpose of register region macros > - rename "this" parameter to "dev" > - change IGROUPR to be RAO (returning 1 => Group1 IRQs) > > Changelog v1 .. v2: > * MASSIVE rework: > - store register_region pointer in kvm_io_bus linked struct > - replace write_mask_xxx functions with extract_bytes() implementation > - change handler functions' prototypes to take and return unsigned long > - use binary search to find matching register handler > - convert endianess of input data in dispatch_mmio_xxx functions > - improve readability of register initializer macros > - remove any GICv2/GICv3 specific functions from vgic-mmio.c > - rename file from vgic_mmio.c to vgic-mmio.c > > Changelog v2 .. v3: > - replace inclusion of vgic/vgic.h with arm_vgic.h > > Changelog v3 .. v4: > - add IRQ number accessor macro > - check access width in dispatcher > - treat non-covered MMIO addresses as RAZ/WI > - remove extract_bytes() (re-introduced as static later in the series) > > include/kvm/vgic/vgic.h | 13 +++ > virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++ > virt/kvm/arm/vgic/vgic-mmio.h | 87 ++++++++++++++++++++ > 3 files changed, 284 insertions(+) > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c > create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h > > diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h > index f663288..ff3f9c2 100644 > --- a/include/kvm/vgic/vgic.h > +++ b/include/kvm/vgic/vgic.h > @@ -106,6 +106,16 @@ struct vgic_irq { > enum vgic_irq_config config; /* Level or edge */ > }; > > +struct vgic_register_region; > + > +struct vgic_io_device { > + gpa_t base_addr; > + struct kvm_vcpu *redist_vcpu; > + const struct vgic_register_region *regions; > + int nr_regions; > + struct kvm_io_device dev; > +}; > + > struct vgic_dist { > bool in_kernel; > bool ready; > @@ -132,6 +142,9 @@ struct vgic_dist { > bool enabled; > > struct vgic_irq *spis; > + > + struct vgic_io_device dist_iodev; > + struct vgic_io_device *redist_iodevs; > }; > > struct vgic_v2_cpu_if { > diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c > new file mode 100644 > index 0000000..012b82b > --- /dev/null > +++ b/virt/kvm/arm/vgic/vgic-mmio.c > @@ -0,0 +1,184 @@ > +/* > + * VGIC MMIO handling functions > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + */ > + > +#include <linux/bitops.h> > +#include <linux/bsearch.h> > +#include <linux/kvm.h> > +#include <linux/kvm_host.h> > +#include <kvm/iodev.h> > +#include <kvm/arm_vgic.h> > + > +#include "vgic.h" > +#include "vgic-mmio.h" > + > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len) > +{ > + return 0; > +} > + > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len) > +{ > + return -1UL; > +} > + > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len, unsigned long val) > +{ > + /* Ignore */ > +} > + > +static int match_region(const void *key, const void *elt) > +{ > + const unsigned int offset = (unsigned long)key; > + const struct vgic_register_region *region = elt; > + > + if (offset < region->reg_offset) > + return -1; > + > + if (offset >= region->reg_offset + region->len) > + return 1; > + > + return 0; > +} > + > +/* Find the proper register handler entry given a certain address offset. */ > +static const struct vgic_register_region * > +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, > + unsigned int offset) > +{ > + return bsearch((void *)(uintptr_t)offset, region, nr_regions, > + sizeof(region[0]), match_region); > +} > + > +/* > + * kvm_mmio_read_buf() returns a value in a format where it can be converted > + * to a byte array and be directly observed as the guest wanted it to appear > + * in memory if it had done the store itself, which is LE for the GIC, as the > + * guest knows the GIC is always LE. > + * > + * We convert this value to the CPUs native format to deal with it as a data > + * value. > + */ > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) > +{ > + unsigned long data = kvm_mmio_read_buf(val, len); > + > + switch (len) { > + case 1: > + return data; > + case 2: > + return le16_to_cpu(data); > + case 4: > + return le32_to_cpu(data); > + default: > + return le64_to_cpu(data); > + } > +} > + > +/* > + * kvm_mmio_write_buf() expects a value in a format such that if converted to > + * a byte array it is observed as the guest would see it if it could perform > + * the load directly. Since the GIC is LE, and the guest knows this, the > + * guest expects a value in little endian format. > + * > + * We convert the data value from the CPUs native format to LE so that the > + * value is returned in the proper format. > + */ > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > + unsigned long data) > +{ > + switch (len) { > + case 1: > + break; > + case 2: > + data = cpu_to_le16(data); > + break; > + case 4: > + data = cpu_to_le32(data); > + break; > + default: > + data = cpu_to_le64(data); > + } > + > + kvm_mmio_write_buf(buf, len, data); > +} > + > +static > +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) > +{ > + return container_of(dev, struct vgic_io_device, dev); > +} > + > +static bool check_region(const struct vgic_register_region *region, > + gpa_t addr, int len) > +{ > + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) > + return true; > + if ((region->access_flags & VGIC_ACCESS_32bit) && > + len == sizeof(u32) && !(addr & 3)) > + return true; > + if ((region->access_flags & VGIC_ACCESS_64bit) && > + len == sizeof(u64) && !(addr & 7)) > + return true; > + > + return false; > +} > + > +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > + gpa_t addr, int len, void *val) > +{ > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > + const struct vgic_register_region *region; > + struct kvm_vcpu *r_vcpu; > + unsigned long data; > + > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > + addr - iodev->base_addr); > + if (!region || !check_region(region, addr, len)) { > + memset(val, 0, len); > + return 0; > + } > + > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > + data = region->read(r_vcpu, addr, len); > + vgic_data_host_to_mmio_bus(val, len, data); > + return 0; > +} > + > +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > + gpa_t addr, int len, const void *val) > +{ > + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > + const struct vgic_register_region *region; > + struct kvm_vcpu *r_vcpu; > + unsigned long data = vgic_data_mmio_bus_to_host(val, len); > + > + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > + addr - iodev->base_addr); > + if (!region) > + return 0; > + > + if (!check_region(region, addr, len)) > + return 0; > + > + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > + region->write(r_vcpu, addr, len, data); > + return 0; > +} > + > +struct kvm_io_device_ops kvm_io_gic_ops = { > + .read = dispatch_mmio_read, > + .write = dispatch_mmio_write, > +}; > diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h > new file mode 100644 > index 0000000..855b1db > --- /dev/null > +++ b/virt/kvm/arm/vgic/vgic-mmio.h > @@ -0,0 +1,87 @@ > +/* > + * Copyright (C) 2015, 2016 ARM Ltd. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program. If not, see <http://www.gnu.org/licenses/>. > + */ > +#ifndef __KVM_ARM_VGIC_MMIO_H__ > +#define __KVM_ARM_VGIC_MMIO_H__ > + > +struct vgic_register_region { > + unsigned int reg_offset; > + unsigned int len; > + unsigned int bits_per_irq; > + unsigned int access_flags; > + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len); > + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, > + unsigned long val); > +}; > + > +extern struct kvm_io_device_ops kvm_io_gic_ops; > + > +#define VGIC_ACCESS_8bit 1 > +#define VGIC_ACCESS_32bit 2 > +#define VGIC_ACCESS_64bit 4 > + > +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ > +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > + ilog2(BITS_PER_BYTE) - 1, 0) > +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ > + 64 / (bits) / 8) In the comment we end up adding here, can we also describe why (addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL> gives us what we need, because I don't get it. Except for this magic formula, which I trust you and Marc have verified and proved: Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> > + > +/* > + * Some VGIC registers store per-IRQ information, with a different number > + * of bits per IRQ. For those registers this macro is used. > + * The _WITH_LENGTH version instantiates registers with a fixed length > + * and is mutually exclusive with the _PER_IRQ version. > + */ > +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ > + { \ > + .reg_offset = off, \ > + .bits_per_irq = bpi, \ > + .len = bpi * 1024 / 8, \ > + .access_flags = acc, \ > + .read = rd, \ > + .write = wr, \ > + } > + > +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ > + { \ > + .reg_offset = off, \ > + .bits_per_irq = 0, \ > + .len = length, \ > + .access_flags = acc, \ > + .read = rd, \ > + .write = wr, \ > + } > + > +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, > + struct vgic_register_region *reg_desc, > + struct vgic_io_device *region, > + int nr_irqs, bool offset_private); > + > +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); > + > +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > + unsigned long data); > + > +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len); > + > +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > + gpa_t addr, unsigned int len); > + > +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > + unsigned int len, unsigned long val); > + > +#endif > -- > 2.8.2 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html
On 18/05/16 13:25, Christoffer Dall wrote: > On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote: >> On 16/05/16 10:53, Andre Przywara wrote: >>> From: Marc Zyngier <marc.zyngier@arm.com> >>> >>> Add an MMIO handling framework to the VGIC emulation: >>> Each register is described by its offset, size (or number of bits per >>> IRQ, if applicable) and the read/write handler functions. We provide >>> initialization macros to describe each GIC register later easily. >>> >>> Separate dispatch functions for read and write accesses are connected >>> to the kvm_io_bus framework and binary-search for the responsible >>> register handler based on the offset address within the region. >>> We convert the incoming data (referenced by a pointer) to the host's >>> endianess and use pass-by-value to hand the data over to the actual >>> handler functions. >>> >>> The register handler prototype and the endianess conversion are >>> courtesy of Christoffer Dall. >>> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> >>> Signed-off-by: Andre Przywara <andre.przywara@arm.com> >>> --- >>> Changelog RFC..v1: >>> - rework MMIO dispatching to use only one kvm_io_bus device >>> - document purpose of register region macros >>> - rename "this" parameter to "dev" >>> - change IGROUPR to be RAO (returning 1 => Group1 IRQs) >>> >>> Changelog v1 .. v2: >>> * MASSIVE rework: >>> - store register_region pointer in kvm_io_bus linked struct >>> - replace write_mask_xxx functions with extract_bytes() implementation >>> - change handler functions' prototypes to take and return unsigned long >>> - use binary search to find matching register handler >>> - convert endianess of input data in dispatch_mmio_xxx functions >>> - improve readability of register initializer macros >>> - remove any GICv2/GICv3 specific functions from vgic-mmio.c >>> - rename file from vgic_mmio.c to vgic-mmio.c >>> >>> Changelog v2 .. v3: >>> - replace inclusion of vgic/vgic.h with arm_vgic.h >>> >>> Changelog v3 .. v4: >>> - add IRQ number accessor macro >>> - check access width in dispatcher >>> - treat non-covered MMIO addresses as RAZ/WI >>> - remove extract_bytes() (re-introduced as static later in the series) >>> >>> include/kvm/vgic/vgic.h | 13 +++ >>> virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++ >>> virt/kvm/arm/vgic/vgic-mmio.h | 87 ++++++++++++++++++++ >>> 3 files changed, 284 insertions(+) >>> create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c >>> create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h >>> >>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h >>> index f663288..ff3f9c2 100644 >>> --- a/include/kvm/vgic/vgic.h >>> +++ b/include/kvm/vgic/vgic.h >>> @@ -106,6 +106,16 @@ struct vgic_irq { >>> enum vgic_irq_config config; /* Level or edge */ >>> }; >>> >>> +struct vgic_register_region; >>> + >>> +struct vgic_io_device { >>> + gpa_t base_addr; >>> + struct kvm_vcpu *redist_vcpu; >>> + const struct vgic_register_region *regions; >>> + int nr_regions; >>> + struct kvm_io_device dev; >>> +}; >>> + >>> struct vgic_dist { >>> bool in_kernel; >>> bool ready; >>> @@ -132,6 +142,9 @@ struct vgic_dist { >>> bool enabled; >>> >>> struct vgic_irq *spis; >>> + >>> + struct vgic_io_device dist_iodev; >>> + struct vgic_io_device *redist_iodevs; >>> }; >>> >>> struct vgic_v2_cpu_if { >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c >>> new file mode 100644 >>> index 0000000..012b82b >>> --- /dev/null >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c >>> @@ -0,0 +1,184 @@ >>> +/* >>> + * VGIC MMIO handling functions >>> + * >>> + * This program is free software; you can redistribute it and/or modify >>> + * it under the terms of the GNU General Public License version 2 as >>> + * published by the Free Software Foundation. >>> + * >>> + * This program is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>> + * GNU General Public License for more details. >>> + */ >>> + >>> +#include <linux/bitops.h> >>> +#include <linux/bsearch.h> >>> +#include <linux/kvm.h> >>> +#include <linux/kvm_host.h> >>> +#include <kvm/iodev.h> >>> +#include <kvm/arm_vgic.h> >>> + >>> +#include "vgic.h" >>> +#include "vgic-mmio.h" >>> + >>> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, >>> + gpa_t addr, unsigned int len) >>> +{ >>> + return 0; >>> +} >>> + >>> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, >>> + gpa_t addr, unsigned int len) >>> +{ >>> + return -1UL; >>> +} >>> + >>> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, >>> + unsigned int len, unsigned long val) >>> +{ >>> + /* Ignore */ >>> +} >>> + >>> +static int match_region(const void *key, const void *elt) >>> +{ >>> + const unsigned int offset = (unsigned long)key; >>> + const struct vgic_register_region *region = elt; >>> + >>> + if (offset < region->reg_offset) >>> + return -1; >>> + >>> + if (offset >= region->reg_offset + region->len) >>> + return 1; >>> + >>> + return 0; >>> +} >>> + >>> +/* Find the proper register handler entry given a certain address offset. */ >>> +static const struct vgic_register_region * >>> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, >>> + unsigned int offset) >>> +{ >>> + return bsearch((void *)(uintptr_t)offset, region, nr_regions, >>> + sizeof(region[0]), match_region); >>> +} >>> + >>> +/* >>> + * kvm_mmio_read_buf() returns a value in a format where it can be converted >>> + * to a byte array and be directly observed as the guest wanted it to appear >>> + * in memory if it had done the store itself, which is LE for the GIC, as the >>> + * guest knows the GIC is always LE. >>> + * >>> + * We convert this value to the CPUs native format to deal with it as a data >>> + * value. >>> + */ >>> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) >>> +{ >>> + unsigned long data = kvm_mmio_read_buf(val, len); >>> + >>> + switch (len) { >>> + case 1: >>> + return data; >>> + case 2: >>> + return le16_to_cpu(data); >>> + case 4: >>> + return le32_to_cpu(data); >>> + default: >>> + return le64_to_cpu(data); >>> + } >>> +} >>> + >>> +/* >>> + * kvm_mmio_write_buf() expects a value in a format such that if converted to >>> + * a byte array it is observed as the guest would see it if it could perform >>> + * the load directly. Since the GIC is LE, and the guest knows this, the >>> + * guest expects a value in little endian format. >>> + * >>> + * We convert the data value from the CPUs native format to LE so that the >>> + * value is returned in the proper format. >>> + */ >>> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, >>> + unsigned long data) >>> +{ >>> + switch (len) { >>> + case 1: >>> + break; >>> + case 2: >>> + data = cpu_to_le16(data); >>> + break; >>> + case 4: >>> + data = cpu_to_le32(data); >>> + break; >>> + default: >>> + data = cpu_to_le64(data); >>> + } >>> + >>> + kvm_mmio_write_buf(buf, len, data); >>> +} >>> + >>> +static >>> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) >>> +{ >>> + return container_of(dev, struct vgic_io_device, dev); >>> +} >>> + >>> +static bool check_region(const struct vgic_register_region *region, >>> + gpa_t addr, int len) >>> +{ >>> + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) >>> + return true; >>> + if ((region->access_flags & VGIC_ACCESS_32bit) && >>> + len == sizeof(u32) && !(addr & 3)) >>> + return true; >>> + if ((region->access_flags & VGIC_ACCESS_64bit) && >>> + len == sizeof(u64) && !(addr & 7)) >>> + return true; >>> + >>> + return false; >>> +} >>> + >>> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, >>> + gpa_t addr, int len, void *val) >>> +{ >>> + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); >>> + const struct vgic_register_region *region; >>> + struct kvm_vcpu *r_vcpu; >>> + unsigned long data; >>> + >>> + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, >>> + addr - iodev->base_addr); >>> + if (!region || !check_region(region, addr, len)) { >>> + memset(val, 0, len); >>> + return 0; >>> + } >>> + >>> + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; >>> + data = region->read(r_vcpu, addr, len); >>> + vgic_data_host_to_mmio_bus(val, len, data); >>> + return 0; >>> +} >>> + >>> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, >>> + gpa_t addr, int len, const void *val) >>> +{ >>> + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); >>> + const struct vgic_register_region *region; >>> + struct kvm_vcpu *r_vcpu; >>> + unsigned long data = vgic_data_mmio_bus_to_host(val, len); >>> + >>> + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, >>> + addr - iodev->base_addr); >>> + if (!region) >>> + return 0; >>> + >>> + if (!check_region(region, addr, len)) >>> + return 0; >>> + >>> + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; >>> + region->write(r_vcpu, addr, len, data); >>> + return 0; >>> +} >>> + >>> +struct kvm_io_device_ops kvm_io_gic_ops = { >>> + .read = dispatch_mmio_read, >>> + .write = dispatch_mmio_write, >>> +}; >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h >>> new file mode 100644 >>> index 0000000..855b1db >>> --- /dev/null >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.h >>> @@ -0,0 +1,87 @@ >>> +/* >>> + * Copyright (C) 2015, 2016 ARM Ltd. >>> + * >>> + * This program is free software; you can redistribute it and/or modify >>> + * it under the terms of the GNU General Public License version 2 as >>> + * published by the Free Software Foundation. >>> + * >>> + * This program is distributed in the hope that it will be useful, >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >>> + * GNU General Public License for more details. >>> + * >>> + * You should have received a copy of the GNU General Public License >>> + * along with this program. If not, see <http://www.gnu.org/licenses/>. >>> + */ >>> +#ifndef __KVM_ARM_VGIC_MMIO_H__ >>> +#define __KVM_ARM_VGIC_MMIO_H__ >>> + >>> +struct vgic_register_region { >>> + unsigned int reg_offset; >>> + unsigned int len; >>> + unsigned int bits_per_irq; >>> + unsigned int access_flags; >>> + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, >>> + unsigned int len); >>> + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, >>> + unsigned long val); >>> +}; >>> + >>> +extern struct kvm_io_device_ops kvm_io_gic_ops; >>> + >>> +#define VGIC_ACCESS_8bit 1 >>> +#define VGIC_ACCESS_32bit 2 >>> +#define VGIC_ACCESS_64bit 4 >>> + >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ >> >> Hmmm. I'd appreciate some additional comments, specially when it comes >> to the various restrictions. May I'd suggest something like: >> >> /* >> * Generate a mask that covers the number of bytes required to address >> * up to 1024 interrupts, each represented by <b> bits. This assumes >> * that <b> is a power of two. >> * >> * ilog2(b) + ilog2(1024) is the number of bits required to bit-address >> * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts >> * this to a byte address. > > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm Yes, same thing. > stupid enough to not understand our use of logarithms here. Can someone > remind me whatever I forgot at CS 101? I'm bad at explaining that kind of things, so let me just quote Wikipedia (https://en.wikipedia.org/wiki/Binary_logarithm): "The number of digits (bits) in the binary representation of a positive integer n is the integral part of 1 + log2?n" Is that what you were missing? > >> */ >> >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ >>> + ilog2(BITS_PER_BYTE) - 1, 0) >> >> /* >> * Convert a base address into a base interrupt (each interrupt >> * represented by <bits> bits. This assumes that <bits> is a power >> * of two, that <addr> both part of a memory region aligned on a > > did you mean '<addr> *is* both part of' ? Indeed. Thanks, M.
On Wed, May 18, 2016 at 03:12:33PM +0100, Marc Zyngier wrote: > On 18/05/16 13:25, Christoffer Dall wrote: > > On Tue, May 17, 2016 at 02:33:36PM +0100, Marc Zyngier wrote: > >> On 16/05/16 10:53, Andre Przywara wrote: > >>> From: Marc Zyngier <marc.zyngier@arm.com> > >>> > >>> Add an MMIO handling framework to the VGIC emulation: > >>> Each register is described by its offset, size (or number of bits per > >>> IRQ, if applicable) and the read/write handler functions. We provide > >>> initialization macros to describe each GIC register later easily. > >>> > >>> Separate dispatch functions for read and write accesses are connected > >>> to the kvm_io_bus framework and binary-search for the responsible > >>> register handler based on the offset address within the region. > >>> We convert the incoming data (referenced by a pointer) to the host's > >>> endianess and use pass-by-value to hand the data over to the actual > >>> handler functions. > >>> > >>> The register handler prototype and the endianess conversion are > >>> courtesy of Christoffer Dall. > >>> > >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > >>> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> > >>> Signed-off-by: Andre Przywara <andre.przywara@arm.com> > >>> --- > >>> Changelog RFC..v1: > >>> - rework MMIO dispatching to use only one kvm_io_bus device > >>> - document purpose of register region macros > >>> - rename "this" parameter to "dev" > >>> - change IGROUPR to be RAO (returning 1 => Group1 IRQs) > >>> > >>> Changelog v1 .. v2: > >>> * MASSIVE rework: > >>> - store register_region pointer in kvm_io_bus linked struct > >>> - replace write_mask_xxx functions with extract_bytes() implementation > >>> - change handler functions' prototypes to take and return unsigned long > >>> - use binary search to find matching register handler > >>> - convert endianess of input data in dispatch_mmio_xxx functions > >>> - improve readability of register initializer macros > >>> - remove any GICv2/GICv3 specific functions from vgic-mmio.c > >>> - rename file from vgic_mmio.c to vgic-mmio.c > >>> > >>> Changelog v2 .. v3: > >>> - replace inclusion of vgic/vgic.h with arm_vgic.h > >>> > >>> Changelog v3 .. v4: > >>> - add IRQ number accessor macro > >>> - check access width in dispatcher > >>> - treat non-covered MMIO addresses as RAZ/WI > >>> - remove extract_bytes() (re-introduced as static later in the series) > >>> > >>> include/kvm/vgic/vgic.h | 13 +++ > >>> virt/kvm/arm/vgic/vgic-mmio.c | 184 ++++++++++++++++++++++++++++++++++++++++++ > >>> virt/kvm/arm/vgic/vgic-mmio.h | 87 ++++++++++++++++++++ > >>> 3 files changed, 284 insertions(+) > >>> create mode 100644 virt/kvm/arm/vgic/vgic-mmio.c > >>> create mode 100644 virt/kvm/arm/vgic/vgic-mmio.h > >>> > >>> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h > >>> index f663288..ff3f9c2 100644 > >>> --- a/include/kvm/vgic/vgic.h > >>> +++ b/include/kvm/vgic/vgic.h > >>> @@ -106,6 +106,16 @@ struct vgic_irq { > >>> enum vgic_irq_config config; /* Level or edge */ > >>> }; > >>> > >>> +struct vgic_register_region; > >>> + > >>> +struct vgic_io_device { > >>> + gpa_t base_addr; > >>> + struct kvm_vcpu *redist_vcpu; > >>> + const struct vgic_register_region *regions; > >>> + int nr_regions; > >>> + struct kvm_io_device dev; > >>> +}; > >>> + > >>> struct vgic_dist { > >>> bool in_kernel; > >>> bool ready; > >>> @@ -132,6 +142,9 @@ struct vgic_dist { > >>> bool enabled; > >>> > >>> struct vgic_irq *spis; > >>> + > >>> + struct vgic_io_device dist_iodev; > >>> + struct vgic_io_device *redist_iodevs; > >>> }; > >>> > >>> struct vgic_v2_cpu_if { > >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c > >>> new file mode 100644 > >>> index 0000000..012b82b > >>> --- /dev/null > >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.c > >>> @@ -0,0 +1,184 @@ > >>> +/* > >>> + * VGIC MMIO handling functions > >>> + * > >>> + * This program is free software; you can redistribute it and/or modify > >>> + * it under the terms of the GNU General Public License version 2 as > >>> + * published by the Free Software Foundation. > >>> + * > >>> + * This program is distributed in the hope that it will be useful, > >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>> + * GNU General Public License for more details. > >>> + */ > >>> + > >>> +#include <linux/bitops.h> > >>> +#include <linux/bsearch.h> > >>> +#include <linux/kvm.h> > >>> +#include <linux/kvm_host.h> > >>> +#include <kvm/iodev.h> > >>> +#include <kvm/arm_vgic.h> > >>> + > >>> +#include "vgic.h" > >>> +#include "vgic-mmio.h" > >>> + > >>> +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, > >>> + gpa_t addr, unsigned int len) > >>> +{ > >>> + return 0; > >>> +} > >>> + > >>> +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, > >>> + gpa_t addr, unsigned int len) > >>> +{ > >>> + return -1UL; > >>> +} > >>> + > >>> +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, > >>> + unsigned int len, unsigned long val) > >>> +{ > >>> + /* Ignore */ > >>> +} > >>> + > >>> +static int match_region(const void *key, const void *elt) > >>> +{ > >>> + const unsigned int offset = (unsigned long)key; > >>> + const struct vgic_register_region *region = elt; > >>> + > >>> + if (offset < region->reg_offset) > >>> + return -1; > >>> + > >>> + if (offset >= region->reg_offset + region->len) > >>> + return 1; > >>> + > >>> + return 0; > >>> +} > >>> + > >>> +/* Find the proper register handler entry given a certain address offset. */ > >>> +static const struct vgic_register_region * > >>> +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, > >>> + unsigned int offset) > >>> +{ > >>> + return bsearch((void *)(uintptr_t)offset, region, nr_regions, > >>> + sizeof(region[0]), match_region); > >>> +} > >>> + > >>> +/* > >>> + * kvm_mmio_read_buf() returns a value in a format where it can be converted > >>> + * to a byte array and be directly observed as the guest wanted it to appear > >>> + * in memory if it had done the store itself, which is LE for the GIC, as the > >>> + * guest knows the GIC is always LE. > >>> + * > >>> + * We convert this value to the CPUs native format to deal with it as a data > >>> + * value. > >>> + */ > >>> +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) > >>> +{ > >>> + unsigned long data = kvm_mmio_read_buf(val, len); > >>> + > >>> + switch (len) { > >>> + case 1: > >>> + return data; > >>> + case 2: > >>> + return le16_to_cpu(data); > >>> + case 4: > >>> + return le32_to_cpu(data); > >>> + default: > >>> + return le64_to_cpu(data); > >>> + } > >>> +} > >>> + > >>> +/* > >>> + * kvm_mmio_write_buf() expects a value in a format such that if converted to > >>> + * a byte array it is observed as the guest would see it if it could perform > >>> + * the load directly. Since the GIC is LE, and the guest knows this, the > >>> + * guest expects a value in little endian format. > >>> + * > >>> + * We convert the data value from the CPUs native format to LE so that the > >>> + * value is returned in the proper format. > >>> + */ > >>> +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, > >>> + unsigned long data) > >>> +{ > >>> + switch (len) { > >>> + case 1: > >>> + break; > >>> + case 2: > >>> + data = cpu_to_le16(data); > >>> + break; > >>> + case 4: > >>> + data = cpu_to_le32(data); > >>> + break; > >>> + default: > >>> + data = cpu_to_le64(data); > >>> + } > >>> + > >>> + kvm_mmio_write_buf(buf, len, data); > >>> +} > >>> + > >>> +static > >>> +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) > >>> +{ > >>> + return container_of(dev, struct vgic_io_device, dev); > >>> +} > >>> + > >>> +static bool check_region(const struct vgic_register_region *region, > >>> + gpa_t addr, int len) > >>> +{ > >>> + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) > >>> + return true; > >>> + if ((region->access_flags & VGIC_ACCESS_32bit) && > >>> + len == sizeof(u32) && !(addr & 3)) > >>> + return true; > >>> + if ((region->access_flags & VGIC_ACCESS_64bit) && > >>> + len == sizeof(u64) && !(addr & 7)) > >>> + return true; > >>> + > >>> + return false; > >>> +} > >>> + > >>> +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > >>> + gpa_t addr, int len, void *val) > >>> +{ > >>> + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > >>> + const struct vgic_register_region *region; > >>> + struct kvm_vcpu *r_vcpu; > >>> + unsigned long data; > >>> + > >>> + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > >>> + addr - iodev->base_addr); > >>> + if (!region || !check_region(region, addr, len)) { > >>> + memset(val, 0, len); > >>> + return 0; > >>> + } > >>> + > >>> + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > >>> + data = region->read(r_vcpu, addr, len); > >>> + vgic_data_host_to_mmio_bus(val, len, data); > >>> + return 0; > >>> +} > >>> + > >>> +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, > >>> + gpa_t addr, int len, const void *val) > >>> +{ > >>> + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); > >>> + const struct vgic_register_region *region; > >>> + struct kvm_vcpu *r_vcpu; > >>> + unsigned long data = vgic_data_mmio_bus_to_host(val, len); > >>> + > >>> + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, > >>> + addr - iodev->base_addr); > >>> + if (!region) > >>> + return 0; > >>> + > >>> + if (!check_region(region, addr, len)) > >>> + return 0; > >>> + > >>> + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; > >>> + region->write(r_vcpu, addr, len, data); > >>> + return 0; > >>> +} > >>> + > >>> +struct kvm_io_device_ops kvm_io_gic_ops = { > >>> + .read = dispatch_mmio_read, > >>> + .write = dispatch_mmio_write, > >>> +}; > >>> diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h > >>> new file mode 100644 > >>> index 0000000..855b1db > >>> --- /dev/null > >>> +++ b/virt/kvm/arm/vgic/vgic-mmio.h > >>> @@ -0,0 +1,87 @@ > >>> +/* > >>> + * Copyright (C) 2015, 2016 ARM Ltd. > >>> + * > >>> + * This program is free software; you can redistribute it and/or modify > >>> + * it under the terms of the GNU General Public License version 2 as > >>> + * published by the Free Software Foundation. > >>> + * > >>> + * This program is distributed in the hope that it will be useful, > >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of > >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > >>> + * GNU General Public License for more details. > >>> + * > >>> + * You should have received a copy of the GNU General Public License > >>> + * along with this program. If not, see <http://www.gnu.org/licenses/>. > >>> + */ > >>> +#ifndef __KVM_ARM_VGIC_MMIO_H__ > >>> +#define __KVM_ARM_VGIC_MMIO_H__ > >>> + > >>> +struct vgic_register_region { > >>> + unsigned int reg_offset; > >>> + unsigned int len; > >>> + unsigned int bits_per_irq; > >>> + unsigned int access_flags; > >>> + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, > >>> + unsigned int len); > >>> + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, > >>> + unsigned long val); > >>> +}; > >>> + > >>> +extern struct kvm_io_device_ops kvm_io_gic_ops; > >>> + > >>> +#define VGIC_ACCESS_8bit 1 > >>> +#define VGIC_ACCESS_32bit 2 > >>> +#define VGIC_ACCESS_64bit 4 > >>> + > >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ > >> > >> Hmmm. I'd appreciate some additional comments, specially when it comes > >> to the various restrictions. May I'd suggest something like: > >> > >> /* > >> * Generate a mask that covers the number of bytes required to address > >> * up to 1024 interrupts, each represented by <b> bits. This assumes > >> * that <b> is a power of two. > >> * > >> * ilog2(b) + ilog2(1024) is the number of bits required to bit-address > >> * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts > >> * this to a byte address. > > > > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm > > Yes, same thing. > > > stupid enough to not understand our use of logarithms here. Can someone > > remind me whatever I forgot at CS 101? > > I'm bad at explaining that kind of things, so let me just quote > Wikipedia (https://en.wikipedia.org/wiki/Binary_logarithm): > > "The number of digits (bits) in the binary representation of a positive > integer n is the integral part of 1 + log2?n" > > Is that what you were missing? > yeah, duh, I feel stupid now. Mind if we add this "note to Christoffer's brain" to the comment: "Since n bits can address a maximum of N=n^2 values, we get the number of bits required to address a number of values by applying log2(N). With N being 1024 * b, we get that ilog(b) + ilog2(1024) is the number..." > > > >> */ > >> > >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > >>> + ilog2(BITS_PER_BYTE) - 1, 0) > >> > >> /* > >> * Convert a base address into a base interrupt (each interrupt > >> * represented by <bits> bits. This assumes that <bits> is a power > >> * of two, that <addr> both part of a memory region aligned on a > > > > did you mean '<addr> *is* both part of' ? > > Indeed. > Thanks, -Christoffer
Hi, .... >> + >> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ >> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ >> + ilog2(BITS_PER_BYTE) - 1, 0) >> +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ >> + 64 / (bits) / 8) > > In the comment we end up adding here, can we also describe why > (addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL> > gives us what we need, because I don't get it. The reason is: we deal with 8 bits per byte, but have bits-per-interrupts values bigger than 8. Doing the maths in floating point arithmetic would work fine: (float)(addr & mask) * (8.0 / bits_per_IRQ) So would this comment make sense? /* * Since we can have more than 8 bits per interrupt, we can't use * "8 / bpi" as a multiplicand directly, so we use a * fixed-point-arithmetic version of it tailored to cover at most 64 * bits per IRQ. */ Cheers, Andre.
Hi, >>> + >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ >> >> Hmmm. I'd appreciate some additional comments, specially when it comes >> to the various restrictions. May I'd suggest something like: >> >> /* >> * Generate a mask that covers the number of bytes required to address >> * up to 1024 interrupts, each represented by <b> bits. This assumes >> * that <b> is a power of two. >> * >> * ilog2(b) + ilog2(1024) is the number of bits required to bit-address >> * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts >> * this to a byte address. > > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm > stupid enough to not understand our use of logarithms here. Can someone > remind me whatever I forgot at CS 101? I guess it was more me not seeing the wood for the trees here: Indeed doing the multiplication first and then calling ilog2 seems to make more sense. Also I was thinking: Isn't "GENMASK_ULL(ilog2(n) - 1, 0)" the same as "n - 1"? So can't we just write: #define VGIC_ADDR_IRQ_MASK(bpi) (((bpi) * 1024 / 8) - 1) Proven by enumeration - over the values we use ;-) I'd keep the first paragraph of Marc's comment above then, but we can avoid mentioning Advanced Maths textbooks about binary logarithmic ;-) Cheers, Andre. >> */ >> >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ >>> + ilog2(BITS_PER_BYTE) - 1, 0) >> >> /* >> * Convert a base address into a base interrupt (each interrupt >> * represented by <bits> bits. This assumes that <bits> is a power >> * of two, that <addr> both part of a memory region aligned on a > > did you mean '<addr> *is* both part of' ? > >> * <b> bits boundary, and itself aligned on that same boundary >> * (for regions that describe an interrupt with more than a single >> * byte of data). >> */ >> > > In any case, thanks for the commentary, I was faily lost here. >
On Wed, May 18, 2016 at 05:46:55PM +0100, Andre Przywara wrote: > Hi, > > >>> + > >>> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ > >> > >> Hmmm. I'd appreciate some additional comments, specially when it comes > >> to the various restrictions. May I'd suggest something like: > >> > >> /* > >> * Generate a mask that covers the number of bytes required to address > >> * up to 1024 interrupts, each represented by <b> bits. This assumes > >> * that <b> is a power of two. > >> * > >> * ilog2(b) + ilog2(1024) is the number of bits required to bit-address > >> * 1024 interrupts, each represented by b bits. Minus ilog2(8) converts > >> * this to a byte address. > > > > So I'm guessting this is a rewrite of ilog2( (b * 1024) / 8), but I'm > > stupid enough to not understand our use of logarithms here. Can someone > > remind me whatever I forgot at CS 101? > > I guess it was more me not seeing the wood for the trees here: > Indeed doing the multiplication first and then calling ilog2 seems to > make more sense. Also I was thinking: Isn't > "GENMASK_ULL(ilog2(n) - 1, 0)" the same as "n - 1"? if there's no integer rounding taking place with ilog2 (iow. n is a power of 2) then yes, I believe it is. > > So can't we just write: > > #define VGIC_ADDR_IRQ_MASK(bpi) (((bpi) * 1024 / 8) - 1) that certianly all of the sudden feels intuitive. > > Proven by enumeration - over the values we use ;-) > > I'd keep the first paragraph of Marc's comment above then, but we can > avoid mentioning Advanced Maths textbooks about binary logarithmic ;-) > Haha, you saved my day with that comment. I feel slightly less idiotic now, yes, let's call it advanced quantum math or something instead of CS 101 :) -Christoffer > > >> */ > >> > >>> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > >>> + ilog2(BITS_PER_BYTE) - 1, 0) > >> > >> /* > >> * Convert a base address into a base interrupt (each interrupt > >> * represented by <bits> bits. This assumes that <bits> is a power > >> * of two, that <addr> both part of a memory region aligned on a > > > > did you mean '<addr> *is* both part of' ? > > > >> * <b> bits boundary, and itself aligned on that same boundary > >> * (for regions that describe an interrupt with more than a single > >> * byte of data). > >> */ > >> > > > > In any case, thanks for the commentary, I was faily lost here. > >
On Wed, May 18, 2016 at 04:55:45PM +0100, Andre Przywara wrote: > Hi, > > .... > > >> + > >> +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ > >> +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ > >> + ilog2(BITS_PER_BYTE) - 1, 0) > >> +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ > >> + 64 / (bits) / 8) > > > > In the comment we end up adding here, can we also describe why > > (addr & <magic mask>) * <magic 64> / (bits) / <BITS_PER_BYTE OR BYTES_PER_ULL> > > gives us what we need, because I don't get it. > > The reason is: we deal with 8 bits per byte, but have > bits-per-interrupts values bigger than 8. Doing the maths in floating > point arithmetic would work fine: > > (float)(addr & mask) * (8.0 / bits_per_IRQ) > > So would this comment make sense? > > /* > * Since we can have more than 8 bits per interrupt, we can't use > * "8 / bpi" as a multiplicand directly, so we use a > * fixed-point-arithmetic version of it tailored to cover at most 64 > * bits per IRQ. > */ > Something like this certainly helps, here's another version which is easier for me to understand, but you can take your pick: /* * (addr & mask) gives us the byte offset for the INT ID, so we want to * divide this with 'bytes per irq' to get the INT ID, which is given * by '(bits) / 8'. But we do this with fixed-point-arithmetic and * take advantage of the fact that division by a fraction equals * multiplication with the inverted fraction, and scale up both the * numerator and denominator with 8 to support at most 64 bits per IRQ: */ At least I think we all agree that the approach works by now. -Christoffer
diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h index f663288..ff3f9c2 100644 --- a/include/kvm/vgic/vgic.h +++ b/include/kvm/vgic/vgic.h @@ -106,6 +106,16 @@ struct vgic_irq { enum vgic_irq_config config; /* Level or edge */ }; +struct vgic_register_region; + +struct vgic_io_device { + gpa_t base_addr; + struct kvm_vcpu *redist_vcpu; + const struct vgic_register_region *regions; + int nr_regions; + struct kvm_io_device dev; +}; + struct vgic_dist { bool in_kernel; bool ready; @@ -132,6 +142,9 @@ struct vgic_dist { bool enabled; struct vgic_irq *spis; + + struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c new file mode 100644 index 0000000..012b82b --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -0,0 +1,184 @@ +/* + * VGIC MMIO handling functions + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/bitops.h> +#include <linux/bsearch.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return 0; +} + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return -1UL; +} + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static int match_region(const void *key, const void *elt) +{ + const unsigned int offset = (unsigned long)key; + const struct vgic_register_region *region = elt; + + if (offset < region->reg_offset) + return -1; + + if (offset >= region->reg_offset + region->len) + return 1; + + return 0; +} + +/* Find the proper register handler entry given a certain address offset. */ +static const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions, + unsigned int offset) +{ + return bsearch((void *)(uintptr_t)offset, region, nr_regions, + sizeof(region[0]), match_region); +} + +/* + * kvm_mmio_read_buf() returns a value in a format where it can be converted + * to a byte array and be directly observed as the guest wanted it to appear + * in memory if it had done the store itself, which is LE for the GIC, as the + * guest knows the GIC is always LE. + * + * We convert this value to the CPUs native format to deal with it as a data + * value. + */ +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) +{ + unsigned long data = kvm_mmio_read_buf(val, len); + + switch (len) { + case 1: + return data; + case 2: + return le16_to_cpu(data); + case 4: + return le32_to_cpu(data); + default: + return le64_to_cpu(data); + } +} + +/* + * kvm_mmio_write_buf() expects a value in a format such that if converted to + * a byte array it is observed as the guest would see it if it could perform + * the load directly. Since the GIC is LE, and the guest knows this, the + * guest expects a value in little endian format. + * + * We convert the data value from the CPUs native format to LE so that the + * value is returned in the proper format. + */ +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data) +{ + switch (len) { + case 1: + break; + case 2: + data = cpu_to_le16(data); + break; + case 4: + data = cpu_to_le32(data); + break; + default: + data = cpu_to_le64(data); + } + + kvm_mmio_write_buf(buf, len, data); +} + +static +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) +{ + return container_of(dev, struct vgic_io_device, dev); +} + +static bool check_region(const struct vgic_register_region *region, + gpa_t addr, int len) +{ + if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1) + return true; + if ((region->access_flags & VGIC_ACCESS_32bit) && + len == sizeof(u32) && !(addr & 3)) + return true; + if ((region->access_flags & VGIC_ACCESS_64bit) && + len == sizeof(u64) && !(addr & 7)) + return true; + + return false; +} + +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + unsigned long data; + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region || !check_region(region, addr, len)) { + memset(val, 0, len); + return 0; + } + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + data = region->read(r_vcpu, addr, len); + vgic_data_host_to_mmio_bus(val, len, data); + return 0; +} + +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + unsigned long data = vgic_data_mmio_bus_to_host(val, len); + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region) + return 0; + + if (!check_region(region, addr, len)) + return 0; + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + region->write(r_vcpu, addr, len, data); + return 0; +} + +struct kvm_io_device_ops kvm_io_gic_ops = { + .read = dispatch_mmio_read, + .write = dispatch_mmio_write, +}; diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h new file mode 100644 index 0000000..855b1db --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __KVM_ARM_VGIC_MMIO_H__ +#define __KVM_ARM_VGIC_MMIO_H__ + +struct vgic_register_region { + unsigned int reg_offset; + unsigned int len; + unsigned int bits_per_irq; + unsigned int access_flags; + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, + unsigned long val); +}; + +extern struct kvm_io_device_ops kvm_io_gic_ops; + +#define VGIC_ACCESS_8bit 1 +#define VGIC_ACCESS_32bit 2 +#define VGIC_ACCESS_64bit 4 + +/* generate a mask that covers 1024 interrupts with <b> bits per IRQ */ +#define VGIC_ADDR_IRQ_MASK(b) GENMASK_ULL(ilog2(b) + ilog2(1024) - \ + ilog2(BITS_PER_BYTE) - 1, 0) +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ + 64 / (bits) / 8) + +/* + * Some VGIC registers store per-IRQ information, with a different number + * of bits per IRQ. For those registers this macro is used. + * The _WITH_LENGTH version instantiates registers with a fixed length + * and is mutually exclusive with the _PER_IRQ version. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = bpi * 1024 / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + } + +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + } + +int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu, + struct vgic_register_region *reg_desc, + struct vgic_io_device *region, + int nr_irqs, bool offset_private); + +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); + +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data); + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +#endif