[kvmarm,2/2] KVM: ARM: Defer parts of the vgic init until first KVM_RUN

On Sat, Oct 20, 2012 at 6:27 AM, Marc Zyngier <marc.zyngier@arm.com> wrote:
> On Sat, 20 Oct 2012 00:14:42 -0400, Christoffer Dall
> <c.dall@virtualopensystems.com> wrote:
>> The vgic virtual cpu and emulated distributor interfaces must
>> be mapped at a given physical address in the guest.  This address is
>> provided through the KVM_SET_DEVICE_ADDRESS ioctl, which happens after
>> the KVM_CREATE_IRQCHIP ioctl is called, but before the first VCPU is
>> excuted thorugh KVM_RUN.  We create the vgic on KVM_CREATE_IRQCHIP, but
>> query kvm_vgic_ready(kvm), which checks if the vgic.vctrl_base field has
>> been set, before we execute a VCPU, and if it has not been set, we call
>> kvm_vgic_init, which takes care of the remaining setup.
>>
>> We use the IS_VGIC_ADDR_UNDEF() macro, which compares to the
>> VGIC_ADDR_UNDEF constant, to check if an address has been set;  it's
>> unlikely that a device will sit on address 0, but since this is a part
>> of main kernel boot procedure if this stuff is enabled in the config,
>> I'm being paranoid.
>>
>> The distributor and vcpu base addresses used to be a per-host setting
>> global for all VMs, but this is not a requirement and when we want to
>> emulate several boards on a single host, we need the flexibility of
>> storing these guest addresses on a per-VM basis.
>>
>> Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
>> ---
>>  arch/arm/include/asm/kvm_vgic.h |   21 ++++++++--
>>  arch/arm/kvm/arm.c              |   10 ++++-
>>  arch/arm/kvm/vgic.c             |   82
>>  +++++++++++++++++++++++++++------------
>>  3 files changed, 84 insertions(+), 29 deletions(-)
>>
>> diff --git a/arch/arm/include/asm/kvm_vgic.h
>> b/arch/arm/include/asm/kvm_vgic.h
>> index a688132..2de167f 100644
>> --- a/arch/arm/include/asm/kvm_vgic.h
>> +++ b/arch/arm/include/asm/kvm_vgic.h
>> @@ -154,13 +154,14 @@ static inline void vgic_bytemap_set_irq_val(struct
>> vgic_bytemap *x,
>>  struct vgic_dist {
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>       spinlock_t              lock;
>> +     bool                    ready;
>>
>>       /* Virtual control interface mapping */
>>       void __iomem            *vctrl_base;
>>
>> -     /* Distributor mapping in the guest */
>> -     unsigned long           vgic_dist_base;
>> -     unsigned long           vgic_dist_size;
>> +     /* Distributor and vcpu interface mapping in the guest */
>> +     phys_addr_t             vgic_dist_base;
>> +     phys_addr_t             vgic_cpu_base;
>>
>>       /* Distributor enabled */
>>       u32                     enabled;
>> @@ -243,6 +244,7 @@ struct kvm_exit_mmio;
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>  int kvm_vgic_hyp_init(void);
>>  int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
>> +int kvm_vgic_create(struct kvm *kvm);
>>  int kvm_vgic_init(struct kvm *kvm);
>>  void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu);
>>  void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu);
>> @@ -252,8 +254,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
>> unsigned int irq_num,
>>  int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
>>  bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>                     struct kvm_exit_mmio *mmio);
>> +bool irqchip_in_kernel(struct kvm *kvm);
>>
>> -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.vctrl_base))
>> +#define vgic_initialized(k)  ((k)->arch.vgic.ready)
>>  #define
>>  vgic_active_irq(v)   (atomic_read(&(v)->arch.vgic_cpu.irq_active_count)
> ==
>>  0)
>>
>>  #else
>> @@ -267,6 +270,11 @@ static inline int kvm_vgic_set_addr(struct kvm
> *kvm,
>> unsigned long type, u64 add
>>       return 0;
>>  }
>>
>> +static inline int kvm_vgic_create(struct kvm *kvm)
>> +{
>> +     return 0;
>> +}
>> +
>>  static inline int kvm_vgic_init(struct kvm *kvm)
>>  {
>>       return 0;
>> @@ -298,6 +306,11 @@ static inline int irqchip_in_kernel(struct kvm
> *kvm)
>>       return 0;
>>  }
>>
>> +static inline bool kvm_vgic_initialized(struct kvm *kvm)
>> +{
>> +     return true;
>> +}
>> +
>>  static inline int vgic_active_irq(struct kvm_vcpu *vcpu)
>>  {
>>       return 0;
>> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>> index 282794e..d64783e 100644
>> --- a/arch/arm/kvm/arm.c
>> +++ b/arch/arm/kvm/arm.c
>> @@ -636,6 +636,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu,
>> struct kvm_run *run)
>>       if (unlikely(vcpu->arch.target < 0))
>>               return -ENOEXEC;
>>
>> +     /* Initalize the VGIC before running the vcpu */
>> +     if (unlikely(irqchip_in_kernel(vcpu->kvm) &&
>> +                  !vgic_initialized(vcpu->kvm))) {
>> +             ret = kvm_vgic_init(vcpu->kvm);
>> +             if (ret)
>> +                     return ret;
>> +     }
>> +
>>       if (run->exit_reason == KVM_EXIT_MMIO) {
>>               ret = kvm_handle_mmio_return(vcpu, vcpu->run);
>>               if (ret)
>> @@ -889,7 +897,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
>>  #ifdef CONFIG_KVM_ARM_VGIC
>>       case KVM_CREATE_IRQCHIP: {
>>               if (vgic_present)
>> -                     return kvm_vgic_init(kvm);
>> +                     return kvm_vgic_create(kvm);
>>               else
>>                       return -EINVAL;
>>       }
>> diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>> index d63b7f8..fa591db 100644
>> --- a/arch/arm/kvm/vgic.c
>> +++ b/arch/arm/kvm/vgic.c
>> @@ -65,12 +65,17 @@
>>   *   interrupt line to be sampled again.
>>   */
>>
>> -/* Temporary hacks, need to be provided by userspace emulation */
>> -#define VGIC_DIST_BASE               0x2c001000
>> +#define VGIC_ADDR_UNDEF              (-1)
>> +#define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == (typeof(_x))VGIC_ADDR_UNDEF)
>
> This is an awkward construct. You should really know the type of what
> you're checking. Consider:

I don't think it's awkward, but a pretty common construct. I can't
know which type I check when it's used for both a pointer and a
phys_addr_t.

>
> #define VGIC_ADDR_UNDEF ((phys_addr_t)(~0UL)
> #define IS_VGIC_ADDR_UNDEF(x) ((x) == VGIC_ADDR_UNDEF)
>
>> +
>> +
>>  #define VGIC_DIST_SIZE               0x1000
>>  #define VGIC_CPU_BASE                0x2c002000
>>  #define VGIC_CPU_SIZE                0x2000
>>
>> +/* Physical address of vgic virtual cpu interface */
>> +static phys_addr_t vgic_vcpu_base;
>> +
>>  /* Virtual control interface base address */
>>  static void __iomem *vgic_vctrl_base;
>>
>> @@ -538,7 +543,7 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct
>> kvm_run *run, struct kvm_exi
>>
>>       if (!irqchip_in_kernel(vcpu->kvm) ||
>>           mmio->phys_addr < base ||
>> -         (mmio->phys_addr + mmio->len) > (base + dist->vgic_dist_size))
>> +         (mmio->phys_addr + mmio->len) > (base + VGIC_DIST_SIZE))
>>               return false;
>>
>>       range = find_matching_range(vgic_ranges, mmio, base);
>> @@ -1027,7 +1032,7 @@ void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
>>               vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
>>       }
>>
>> -     BUG_ON(!vcpu->kvm->arch.vgic.vctrl_base);
>> +     BUG_ON(IS_VGIC_ADDR_UNDEF(vcpu->kvm->arch.vgic.vctrl_base));
>>       reg = readl_relaxed(vcpu->kvm->arch.vgic.vctrl_base + GICH_VTR);
>>       vgic_cpu->nr_lr = (reg & 0x1f) + 1;
>>
>> @@ -1101,29 +1106,23 @@ out_free_irq:
>>
>>  int kvm_vgic_init(struct kvm *kvm)
>>  {
>> -     int ret, i;
>> -     struct resource vcpu_res;
>> +     int ret = 0, i;
>>
>>       mutex_lock(&kvm->lock);
>>
>> -     if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> -             kvm_err("Cannot obtain VCPU resource\n");
>> -             ret = -ENXIO;
>> +     if (vgic_initialized(kvm))
>>               goto out;
>> -     }
>>
>> -     if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> -             ret = -EEXIST;
>> +     if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
>> +         IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
>> +             kvm_err("Need to set vgic cpu and dist addresses first\n");
>> +             ret = -ENXIO;
>>               goto out;
>>       }
>>
>> -     spin_lock_init(&kvm->arch.vgic.lock);
>> -     kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> -     kvm->arch.vgic.vgic_dist_base = VGIC_DIST_BASE;
>> -     kvm->arch.vgic.vgic_dist_size = VGIC_DIST_SIZE;
>> +     ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
>> +                                 vgic_vcpu_base, VGIC_CPU_SIZE);
>>
>> -     ret = kvm_phys_addr_ioremap(kvm, VGIC_CPU_BASE,
>> -                                 vcpu_res.start, VGIC_CPU_SIZE);
>>       if (ret) {
>>               kvm_err("Unable to remap VGIC CPU to VCPU\n");
>>               goto out;
>> @@ -1132,12 +1131,45 @@ int kvm_vgic_init(struct kvm *kvm)
>>       for (i = 32; i < VGIC_NR_IRQS; i += 4)
>>               vgic_set_target_reg(kvm, 0, i);
>>
>> +     kvm_timer_init(kvm);
>> +     kvm->arch.vgic.ready = true;
>>  out:
>>       mutex_unlock(&kvm->lock);
>> +     return ret;
>> +}
>>
>> -     if (!ret)
>> -             kvm_timer_init(kvm);
>> +bool irqchip_in_kernel(struct kvm *kvm)
>> +{
>> +     return !(IS_VGIC_ADDR_UNDEF(vgic_vcpu_base));
>
> Erm... This check is supposed VM specific. Here, you check something that
> will be initialized once and for all when the first VM is run. Use one of
> the vgic *guest* base addresses instead.
>

Ouch, right you are.

(This whole scheme of checking various base addresses for whether
something is initialized or not seems quite brittle, but I don't want
to rework it before a merge)

>> +}
>>
>> +int kvm_vgic_create(struct kvm *kvm)
>> +{
>> +     int ret;
>> +     struct resource vcpu_res;
>> +
>> +     mutex_lock(&kvm->lock);
>> +
>> +     if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
>> +             kvm_err("Cannot obtain VCPU resource\n");
>> +             ret = -ENXIO;
>> +             goto out;
>> +     }
>
> This function is called on every VM creation. Consider moving this lookup
> and the vgic_vcpu_base assignment to kvm_vgic_hyp_init().
>
>> +     if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
>> +             ret = -EEXIST;
>> +             goto out;
>> +     }
>> +
>> +     spin_lock_init(&kvm->arch.vgic.lock);
>> +     kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
>> +     kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
>> +     kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
>> +
>> +     vgic_vcpu_base = vcpu_res.start;
>> +     ret = 0;
>> +out:
>> +     mutex_unlock(&kvm->lock);
>>       return ret;
>>  }
>>
>> @@ -1151,12 +1183,14 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned
>> long type, u64 addr)
>>       mutex_lock(&kvm->lock);
>>       switch (type) {
>>       case KVM_VGIC_V2_ADDR_TYPE_DIST:
>> -             if (addr != VGIC_DIST_BASE)
>> -                     return -EINVAL;
>> +             if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base))
>> +                     return -EEXIST;
>> +             kvm->arch.vgic.vgic_dist_base = addr;
>>               break;
>>       case KVM_VGIC_V2_ADDR_TYPE_CPU:
>> -             if (addr != VGIC_CPU_BASE)
>> -                     return -EINVAL;
>> +             if (!IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base))
>> +                     return -EEXIST;
>> +             kvm->arch.vgic.vgic_cpu_base = addr;
>>               break;
>>       default:
>>               r = -ENODEV;
>
> We need some additional checks here about the validity of the address (at
> least page aligned, and non overlapping with memory or other in-kernel
> device models).
>

since we don't have any other in-kernel device models that are memory
mapped, we should cross that bridge when/if we get there. It will not
affect stability of the host kernel, merely not properly boot a guest
if you supply bogus overlapping io regions.

See patch diff:

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[kvmarm,2/2] KVM: ARM: Defer parts of the vgic init until first KVM_RUN

Commit Message

Patch