Message ID | 20121001091426.49503.94722.stgit@ubuntu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Oct 01, 2012 at 10:14:26AM +0100, Christoffer Dall wrote: > From: Marc Zyngier <marc.zyngier@arm.com> > > Add the init code for the hypervisor, the virtual machine, and > the virtual CPUs. > > An interrupt handler is also wired to allow the VGIC maintenance > interrupts, used to deal with level triggered interrupts and LR > underflows. > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> > Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> > --- [...] > diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c > index b52d4c2..fc2a138 100644 > --- a/arch/arm/kvm/vgic.c > +++ b/arch/arm/kvm/vgic.c > @@ -20,7 +20,14 @@ > #include <linux/kvm_host.h> > #include <linux/interrupt.h> > #include <linux/io.h> > +#include <linux/of.h> > +#include <linux/of_address.h> > +#include <linux/of_irq.h> > + > #include <asm/kvm_emulate.h> > +#include <asm/hardware/gic.h> > +#include <asm/kvm_arm.h> > +#include <asm/kvm_mmu.h> > > /* > * How the whole thing works (courtesy of Christoffer Dall): > @@ -61,6 +68,13 @@ > /* Temporary hacks, need to be provided by userspace emulation */ > #define VGIC_DIST_BASE 0x2c001000 > #define VGIC_DIST_SIZE 0x1000 > +#define VGIC_CPU_BASE 0x2c002000 > +#define VGIC_CPU_SIZE 0x2000 We really don't want the physical memory map for the guest hardwired in the kernel. Please find a way to parameterise this from userspace. Will
On Tue, 2 Oct 2012 10:24:13 +0100, Will Deacon <will.deacon@arm.com> wrote: > On Mon, Oct 01, 2012 at 10:14:26AM +0100, Christoffer Dall wrote: >> From: Marc Zyngier <marc.zyngier@arm.com> >> >> Add the init code for the hypervisor, the virtual machine, and >> the virtual CPUs. >> >> An interrupt handler is also wired to allow the VGIC maintenance >> interrupts, used to deal with level triggered interrupts and LR >> underflows. >> >> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> >> --- > > [...] > >> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c >> index b52d4c2..fc2a138 100644 >> --- a/arch/arm/kvm/vgic.c >> +++ b/arch/arm/kvm/vgic.c >> @@ -20,7 +20,14 @@ >> #include <linux/kvm_host.h> >> #include <linux/interrupt.h> >> #include <linux/io.h> >> +#include <linux/of.h> >> +#include <linux/of_address.h> >> +#include <linux/of_irq.h> >> + >> #include <asm/kvm_emulate.h> >> +#include <asm/hardware/gic.h> >> +#include <asm/kvm_arm.h> >> +#include <asm/kvm_mmu.h> >> >> /* >> * How the whole thing works (courtesy of Christoffer Dall): >> @@ -61,6 +68,13 @@ >> /* Temporary hacks, need to be provided by userspace emulation */ >> #define VGIC_DIST_BASE 0x2c001000 >> #define VGIC_DIST_SIZE 0x1000 >> +#define VGIC_CPU_BASE 0x2c002000 >> +#define VGIC_CPU_SIZE 0x2000 > > We really don't want the physical memory map for the guest hardwired in the > kernel. Please find a way to parameterise this from userspace. Yes, this is a known problem. KVM doesn't offer a standard way of passing the address of an interrupt controller (none of the other architectures have it memory mapped). We probably need a separate ioctl for that... M.
On Oct 2, 2012, at 6:25 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: > On Tue, 2 Oct 2012 10:24:13 +0100, Will Deacon <will.deacon@arm.com> > wrote: >> On Mon, Oct 01, 2012 at 10:14:26AM +0100, Christoffer Dall wrote: >>> From: Marc Zyngier <marc.zyngier@arm.com> >>> >>> Add the init code for the hypervisor, the virtual machine, and >>> the virtual CPUs. >>> >>> An interrupt handler is also wired to allow the VGIC maintenance >>> interrupts, used to deal with level triggered interrupts and LR >>> underflows. >>> >>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> >>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com> >>> --- >> >> [...] >> >>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c >>> index b52d4c2..fc2a138 100644 >>> --- a/arch/arm/kvm/vgic.c >>> +++ b/arch/arm/kvm/vgic.c >>> @@ -20,7 +20,14 @@ >>> #include <linux/kvm_host.h> >>> #include <linux/interrupt.h> >>> #include <linux/io.h> >>> +#include <linux/of.h> >>> +#include <linux/of_address.h> >>> +#include <linux/of_irq.h> >>> + >>> #include <asm/kvm_emulate.h> >>> +#include <asm/hardware/gic.h> >>> +#include <asm/kvm_arm.h> >>> +#include <asm/kvm_mmu.h> >>> >>> /* >>> * How the whole thing works (courtesy of Christoffer Dall): >>> @@ -61,6 +68,13 @@ >>> /* Temporary hacks, need to be provided by userspace emulation */ >>> #define VGIC_DIST_BASE 0x2c001000 >>> #define VGIC_DIST_SIZE 0x1000 >>> +#define VGIC_CPU_BASE 0x2c002000 >>> +#define VGIC_CPU_SIZE 0x2000 >> >> We really don't want the physical memory map for the guest hardwired in > the >> kernel. Please find a way to parameterise this from userspace. > > Yes, this is a known problem. KVM doesn't offer a standard way of passing > the address of an interrupt controller (none of the other architectures > have it memory mapped). > > We probably need a separate ioctl Thoughts on how to make this API flexible enough? Can we somehow provide a device tree to the host kernel, which would be the same device tree the guest uses, which may also describe virtio features, or is this completely sci fi? -Christoffer
On 2 October 2012 18:55, Christoffer Dall <c.dall@virtualopensystems.com> wrote: > On Oct 2, 2012, at 6:25 AM, Marc Zyngier <marc.zyngier@arm.com> wrote: >> On Tue, 2 Oct 2012 10:24:13 +0100, Will Deacon <will.deacon@arm.com> >>> We really don't want the physical memory map for the guest hardwired in >> the >>> kernel. Please find a way to parameterise this from userspace. >> >> Yes, this is a known problem. KVM doesn't offer a standard way of passing >> the address of an interrupt controller (none of the other architectures >> have it memory mapped). >> >> We probably need a separate ioctl > > Thoughts on how to make this API flexible enough? > > Can we somehow provide a device tree to the host kernel, which would > be the same device tree the guest uses, which may also describe virtio > features, or is this completely sci fi? I'm not really in favour of trying to shoehorn device trees in here (among other things, the virtual machine we create should be the actual machine matching the hardware, not something randomly generated from the device tree. Also requiring userspace to manufacture a device tree from scratch is kind of awkward: there's no reason the guest has to be using one, and it's a lot of effort to go to to pass a single address into the kernel...) We probably want to be passing in the "base of the cpu-internal peripherals", rather than "base of the GIC" specifically. For the A15 these are the same thing, but that's not inherent [compare the A9 which has more devices at fixed offsets from a configurable base address]. On hardware this is done by having an input signal that's sampled at reset that tells the CPU where the peripherals are; is there an equivalent of that for any other CPU properties that we have already in the KVM API? -- PMM
On Tue, Oct 02, 2012 at 07:31:43PM +0100, Peter Maydell wrote: > We probably want to be passing in the "base of the cpu-internal > peripherals", rather than "base of the GIC" specifically. For the > A15 these are the same thing, but that's not inherent [compare the > A9 which has more devices at fixed offsets from a configurable > base address]. If you do that, userspace will need a way to probe the emulated CPU so that is knows exactly which set of peripherals there are and which ones it needs to emulate. This feels pretty nasty, given that the vgic is handled more or less completely by the kernel-side of things. Will
On 2 October 2012 20:28, Will Deacon <will.deacon@arm.com> wrote: > On Tue, Oct 02, 2012 at 07:31:43PM +0100, Peter Maydell wrote: >> We probably want to be passing in the "base of the cpu-internal >> peripherals", rather than "base of the GIC" specifically. For the >> A15 these are the same thing, but that's not inherent [compare the >> A9 which has more devices at fixed offsets from a configurable >> base address]. > > If you do that, userspace will need a way to probe the emulated CPU so > that is knows exactly which set of peripherals there are and which ones it > needs to emulate. This feels pretty nasty, given that the vgic is handled > more or less completely by the kernel-side of things. Userspace knows what the emulated CPU is because it tells the kernel which CPU to provide -- the kernel can say "yes" or "no" but it can't provide a different CPU to the one we ask for, or one with bits mising... -- PMM
On Tue, Oct 02, 2012 at 08:45:54PM +0100, Peter Maydell wrote: > On 2 October 2012 20:28, Will Deacon <will.deacon@arm.com> wrote: > > On Tue, Oct 02, 2012 at 07:31:43PM +0100, Peter Maydell wrote: > >> We probably want to be passing in the "base of the cpu-internal > >> peripherals", rather than "base of the GIC" specifically. For the > >> A15 these are the same thing, but that's not inherent [compare the > >> A9 which has more devices at fixed offsets from a configurable > >> base address]. > > > > If you do that, userspace will need a way to probe the emulated CPU so > > that is knows exactly which set of peripherals there are and which ones it > > needs to emulate. This feels pretty nasty, given that the vgic is handled > > more or less completely by the kernel-side of things. > > Userspace knows what the emulated CPU is because it tells the > kernel which CPU to provide -- the kernel can say "yes" or "no" but > it can't provide a different CPU to the one we ask for, or > one with bits mising... Aha, ok, I didn't realise that's how it works. Does userspace just pass the CPUID or is there an identifier provided by kvm? /me jumps back into the code. Thanks, Will
On Wed, Oct 3, 2012 at 6:02 AM, Will Deacon <will.deacon@arm.com> wrote: > On Tue, Oct 02, 2012 at 08:45:54PM +0100, Peter Maydell wrote: >> On 2 October 2012 20:28, Will Deacon <will.deacon@arm.com> wrote: >> > On Tue, Oct 02, 2012 at 07:31:43PM +0100, Peter Maydell wrote: >> >> We probably want to be passing in the "base of the cpu-internal >> >> peripherals", rather than "base of the GIC" specifically. For the >> >> A15 these are the same thing, but that's not inherent [compare the >> >> A9 which has more devices at fixed offsets from a configurable >> >> base address]. >> > >> > If you do that, userspace will need a way to probe the emulated CPU so >> > that is knows exactly which set of peripherals there are and which ones it >> > needs to emulate. This feels pretty nasty, given that the vgic is handled >> > more or less completely by the kernel-side of things. >> >> Userspace knows what the emulated CPU is because it tells the >> kernel which CPU to provide -- the kernel can say "yes" or "no" but >> it can't provide a different CPU to the one we ask for, or >> one with bits mising... > > Aha, ok, I didn't realise that's how it works. Does userspace just pass the > CPUID or is there an identifier provided by kvm? > > /me jumps back into the code. > Userspace provides an identifier (0 for Cortex-A15). This changed in the last patch series, so as to only have one (public and internal) identifier used to index into the array of core-specific coprocessor handlings.
diff --git a/arch/arm/include/asm/kvm_vgic.h b/arch/arm/include/asm/kvm_vgic.h index 9740f1f..c8327f3 100644 --- a/arch/arm/include/asm/kvm_vgic.h +++ b/arch/arm/include/asm/kvm_vgic.h @@ -238,6 +238,9 @@ struct kvm_run; struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC +int kvm_vgic_hyp_init(void); +int kvm_vgic_init(struct kvm *kvm); +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu); void kvm_vgic_sync_from_cpu(struct kvm_vcpu *vcpu); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index be593220..f88fd18 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,6 +61,8 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -185,6 +187,8 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { #ifdef CONFIG_KVM_ARM_VGIC case KVM_CAP_IRQCHIP: + r = vgic_present; + break; #endif case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: @@ -992,8 +996,8 @@ static int init_hyp_mode(void) * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); - if (err) - goto out_free_mappings; + if (!err) + vgic_present = true; return 0; out_free_vfp: diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index b52d4c2..fc2a138 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -20,7 +20,14 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> + #include <asm/kvm_emulate.h> +#include <asm/hardware/gic.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> /* * How the whole thing works (courtesy of Christoffer Dall): @@ -61,6 +68,13 @@ /* Temporary hacks, need to be provided by userspace emulation */ #define VGIC_DIST_BASE 0x2c001000 #define VGIC_DIST_SIZE 0x1000 +#define VGIC_CPU_BASE 0x2c002000 +#define VGIC_CPU_SIZE 0x2000 + +/* Virtual control interface base address */ +static void __iomem *vgic_vctrl_base; + +static struct device_node *vgic_node; #define ACCESS_READ_VALUE (1 << 0) #define ACCESS_READ_RAZ (0 << 0) @@ -908,3 +922,188 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, return 0; } + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; + struct vgic_dist *dist; + struct vgic_cpu *vgic_cpu; + + if (WARN(!vcpu, + "VGIC interrupt on CPU %d with no vcpu\n", smp_processor_id())) + return IRQ_HANDLED; + + vgic_cpu = &vcpu->arch.vgic_cpu; + dist = &vcpu->kvm->arch.vgic; + kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); + + /* + * We do not need to take the distributor lock here, since the only + * action we perform is clearing the irq_active_bit for an EOIed + * level interrupt. There is a potential race with + * the queuing of an interrupt in __kvm_sync_to_cpu(), where we check + * if the interrupt is already active. Two possibilities: + * + * - The queuing is occuring on the same vcpu: cannot happen, as we're + * already in the context of this vcpu, and executing the handler + * - The interrupt has been migrated to another vcpu, and we ignore + * this interrupt for this run. Big deal. It is still pending though, + * and will get considered when this vcpu exits. + */ + if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) { + /* + * Some level interrupts have been EOIed. Clear their + * active bit. + */ + int lr, irq; + + for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_eisr, + vgic_cpu->nr_lr) { + irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID; + + vgic_bitmap_set_irq_val(&dist->irq_active, + vcpu->vcpu_id, irq, 0); + vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI; + writel_relaxed(vgic_cpu->vgic_lr[lr], + dist->vctrl_base + GICH_LR0 + (lr << 2)); + } + } + + if (vgic_cpu->vgic_misr & VGIC_MISR_U) { + vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE; + writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base + GICH_HCR); + } + + return IRQ_HANDLED; +} + +void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u32 reg; + int i; + + if (!irqchip_in_kernel(vcpu->kvm)) + return; + + for (i = 0; i < VGIC_NR_IRQS; i++) { + if (i < 16) + vgic_bitmap_set_irq_val(&dist->irq_enabled, + vcpu->vcpu_id, i, 1); + if (i < 32) + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, 1); + + vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY; + } + + BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base); + reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR); + vgic_cpu->nr_lr = (reg & 0x1f) + 1; + + reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VMCR); + vgic_cpu->vgic_vmcr = reg | (0x1f << 27); /* Priority */ + + vgic_cpu->vgic_hcr |= VGIC_HCR_EN; /* Get the show on the road... */ +} + +static void vgic_init_maintenance_interrupt(void *info) +{ + unsigned int *irqp = info; + + enable_percpu_irq(*irqp, 0); +} + +int kvm_vgic_hyp_init(void) +{ + int ret; + unsigned int irq; + struct resource vctrl_res; + + vgic_node = of_find_compatible_node(NULL, NULL, "arm,cortex-a15-gic"); + if (!vgic_node) + return -ENODEV; + + irq = irq_of_parse_and_map(vgic_node, 0); + if (!irq) + return -ENXIO; + + ret = request_percpu_irq(irq, vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", irq); + return ret; + } + + ret = of_address_to_resource(vgic_node, 2, &vctrl_res); + if (ret) { + kvm_err("Cannot obtain VCTRL resource\n"); + goto out_free_irq; + } + + vgic_vctrl_base = of_iomap(vgic_node, 2); + if (!vgic_vctrl_base) { + kvm_err("Cannot ioremap VCTRL\n"); + ret = -ENOMEM; + goto out_free_irq; + } + + ret = create_hyp_io_mappings(vgic_vctrl_base, + vgic_vctrl_base + resource_size(&vctrl_res), + vctrl_res.start); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out_unmap; + } + + kvm_info("%s@%llx IRQ%d\n", vgic_node->name, vctrl_res.start, irq); + on_each_cpu(vgic_init_maintenance_interrupt, &irq, 1); + + return 0; + +out_unmap: + iounmap(vgic_vctrl_base); +out_free_irq: + free_percpu_irq(irq, kvm_get_running_vcpus()); + + return ret; +} + +int kvm_vgic_init(struct kvm *kvm) +{ + int ret, i; + struct resource vcpu_res; + + mutex_lock(&kvm->lock); + + if (of_address_to_resource(vgic_node, 3, &vcpu_res)) { + kvm_err("Cannot obtain VCPU resource\n"); + ret = -ENXIO; + goto out; + } + + if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + ret = -EEXIST; + goto out; + } + + spin_lock_init(&kvm->arch.vgic.lock); + kvm->arch.vgic.vctrl_base = vgic_vctrl_base; + kvm->arch.vgic.vgic_dist_base = VGIC_DIST_BASE; + kvm->arch.vgic.vgic_dist_size = VGIC_DIST_SIZE; + + ret = kvm_phys_addr_ioremap(kvm, VGIC_CPU_BASE, + vcpu_res.start, VGIC_CPU_SIZE); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + + for (i = 32; i < VGIC_NR_IRQS; i += 4) + vgic_set_target_reg(kvm, 0, i); + +out: + mutex_unlock(&kvm->lock); + return ret; +}