diff mbox

[08/12] KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC

Message ID 1426263012-22935-9-git-send-email-andre.przywara@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andre Przywara March 13, 2015, 4:10 p.m. UTC
Currently we use a lot of VGIC specific code to do the MMIO
dispatching.
Use the previous reworks to add kvm_io_bus style MMIO handlers.

Those are not yet called by the MMIO abort handler, also the actual
VGIC emulator function do not make use of it yet, but will be enabled
with the following patches.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
---
 include/kvm/arm_vgic.h |    9 ++++
 virt/kvm/arm/vgic.c    |  111 ++++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic.h    |    7 +++
 3 files changed, 127 insertions(+)

Comments

Christoffer Dall March 14, 2015, 2:27 p.m. UTC | #1
On Fri, Mar 13, 2015 at 04:10:08PM +0000, Andre Przywara wrote:
> Currently we use a lot of VGIC specific code to do the MMIO
> dispatching.
> Use the previous reworks to add kvm_io_bus style MMIO handlers.
> 
> Those are not yet called by the MMIO abort handler, also the actual
> VGIC emulator function do not make use of it yet, but will be enabled
> with the following patches.
> 
> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> ---
>  include/kvm/arm_vgic.h |    9 ++++
>  virt/kvm/arm/vgic.c    |  111 ++++++++++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/arm/vgic.h    |    7 +++
>  3 files changed, 127 insertions(+)
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index b81630b..4bfc6a3 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -24,6 +24,7 @@
>  #include <linux/irqreturn.h>
>  #include <linux/spinlock.h>
>  #include <linux/types.h>
> +#include <kvm/iodev.h>
>  
>  #define VGIC_NR_IRQS_LEGACY	256
>  #define VGIC_NR_SGIS		16
> @@ -147,6 +148,14 @@ struct vgic_vm_ops {
>  	int	(*map_resources)(struct kvm *, const struct vgic_params *);
>  };
>  
> +struct vgic_io_device {
> +	gpa_t addr;
> +	int len;
> +	const struct vgic_io_range *reg_ranges;
> +	struct kvm_vcpu *redist_vcpu;
> +	struct kvm_io_device dev;
> +};
> +
>  struct vgic_dist {
>  	spinlock_t		lock;
>  	bool			in_kernel;
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 7aae19b..71389b8 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -32,6 +32,8 @@
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_mmu.h>
>  #include <trace/events/kvm.h>
> +#include <asm/kvm.h>
> +#include <kvm/iodev.h>
>  
>  /*
>   * How the whole thing works (courtesy of Christoffer Dall):
> @@ -774,6 +776,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  }
>  
>  /**
> + * vgic_handle_mmio_access - handle an in-kernel MMIO access
> + * This is called by the read/write KVM IO device wrappers below.
> + * @vcpu:	pointer to the vcpu performing the access
> + * @this:	pointer to the KVM IO device in charge
> + * @addr:	guest physical address of the access
> + * @len:	size of the access
> + * @val:	pointer to the data region
> + * @is_write:	read or write access
> + *
> + * returns true if the MMIO access could be performed
> + */
> +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
> +				   struct kvm_io_device *this, gpa_t addr,
> +				   int len, void *val, bool is_write)
> +{
> +	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> +	struct vgic_io_device *iodev = container_of(this,
> +						    struct vgic_io_device, dev);
> +	struct kvm_run *run = vcpu->run;
> +	const struct vgic_io_range *range;
> +	struct kvm_exit_mmio mmio;
> +	bool updated_state;
> +	gpa_t offset;
> +
> +	offset = addr - iodev->addr;
> +	range = vgic_find_range(iodev->reg_ranges, len, offset);
> +	if (unlikely(!range || !range->handle_mmio)) {
> +		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
> +		return -ENXIO;
> +	}
> +
> +	mmio.phys_addr = addr;
> +	mmio.len = len;
> +	mmio.is_write = is_write;
> +	if (is_write)
> +		memcpy(mmio.data, val, len);
> +	mmio.private = iodev->redist_vcpu;
> +
> +	spin_lock(&dist->lock);
> +	offset -= range->base;
> +	if (vgic_validate_access(dist, range, offset)) {
> +		updated_state = call_range_handler(vcpu, &mmio, offset, range);
> +		if (!is_write)
> +			memcpy(val, mmio.data, len);
> +	} else {
> +		if (!is_write)
> +			memset(val, 0, len);
> +		updated_state = false;
> +	}
> +	spin_unlock(&dist->lock);
> +	kvm_prepare_mmio(run, &mmio);

we're not the only user of kvm_exit_mmio I believe, so we could rename
this to vgic_io as well and you could change the mmio.data array to be a
void *val pointer, which just gets set to the pointer passed into this
function (which I think points to the kvm_run structs data array) and
you can avoid all these memcopies, right?

> +	kvm_handle_mmio_return(vcpu, run);
> +
> +	if (updated_state)
> +		vgic_kick_vcpus(vcpu->kvm);
> +
> +	return 0;
> +}
> +
> +/**
>   * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
>   * @vcpu:      pointer to the vcpu performing the access
>   * @run:       pointer to the kvm_run structure
> @@ -797,6 +859,55 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
>  	return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
>  }
>  
> +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
> +				 struct kvm_io_device *this,
> +				 gpa_t addr, int len, void *val)
> +{
> +	return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
> +}
> +
> +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
> +				  struct kvm_io_device *this,
> +				  gpa_t addr, int len, const void *val)
> +{
> +	return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
> +				       true);
> +}
> +
> +struct kvm_io_device_ops vgic_io_ops = {
> +	.read	= vgic_handle_mmio_read,
> +	.write	= vgic_handle_mmio_write,
> +};
> +

can you add kdocs to this exported function?

> +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
> +			     const struct vgic_io_range *ranges,
> +			     int redist_id,

nit: consider renaming to redist_vcpu_id

> +			     struct vgic_io_device *iodev)
> +{
> +	struct kvm_vcpu *vcpu = NULL;
> +	int ret;
> +
> +	if (redist_id >= 0)
> +		vcpu = kvm_get_vcpu(kvm, redist_id);
> +
> +	iodev->addr		= base;
> +	iodev->len		= len;
> +	iodev->reg_ranges	= ranges;
> +	iodev->redist_vcpu	= vcpu;
> +
> +	kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
> +
> +	mutex_lock(&kvm->slots_lock);
> +
> +	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
> +				      &iodev->dev);
> +	mutex_unlock(&kvm->slots_lock);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 0;

kvm_io_bus_register_dev returns either 0 or -ERRNO, so you can just
return ret here.

> +}
> +
>  static int vgic_nr_shared_irqs(struct vgic_dist *dist)
>  {
>  	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
> diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
> index ffafb15..f2063a7 100644
> --- a/virt/kvm/arm/vgic.h
> +++ b/virt/kvm/arm/vgic.h
> @@ -20,6 +20,8 @@
>  #ifndef __KVM_VGIC_H__
>  #define __KVM_VGIC_H__
>  
> +#include <kvm/iodev.h>
> +
>  #define VGIC_ADDR_UNDEF		(-1)
>  #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
>  
> @@ -82,6 +84,11 @@ struct vgic_io_range {
>  			    phys_addr_t offset);
>  };
>  
> +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
> +			     const struct vgic_io_range *ranges,
> +			     int redist_id,
> +			     struct vgic_io_device *iodev);
> +
>  static inline bool is_in_range(phys_addr_t addr, unsigned long len,
>  			       phys_addr_t baseaddr, unsigned long size)
>  {
> -- 
> 1.7.9.5
>
Andre Przywara March 19, 2015, 3:44 p.m. UTC | #2
Hej Christoffer,

On 14/03/15 14:27, Christoffer Dall wrote:
> On Fri, Mar 13, 2015 at 04:10:08PM +0000, Andre Przywara wrote:
>> Currently we use a lot of VGIC specific code to do the MMIO
>> dispatching.
>> Use the previous reworks to add kvm_io_bus style MMIO handlers.
>>
>> Those are not yet called by the MMIO abort handler, also the actual
>> VGIC emulator function do not make use of it yet, but will be enabled
>> with the following patches.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
>> ---
>>  include/kvm/arm_vgic.h |    9 ++++
>>  virt/kvm/arm/vgic.c    |  111 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  virt/kvm/arm/vgic.h    |    7 +++
>>  3 files changed, 127 insertions(+)
>>
>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>> index b81630b..4bfc6a3 100644
>> --- a/include/kvm/arm_vgic.h
>> +++ b/include/kvm/arm_vgic.h
>> @@ -24,6 +24,7 @@
>>  #include <linux/irqreturn.h>
>>  #include <linux/spinlock.h>
>>  #include <linux/types.h>
>> +#include <kvm/iodev.h>
>>
>>  #define VGIC_NR_IRQS_LEGACY 256
>>  #define VGIC_NR_SGIS                16
>> @@ -147,6 +148,14 @@ struct vgic_vm_ops {
>>      int     (*map_resources)(struct kvm *, const struct vgic_params *);
>>  };
>>
>> +struct vgic_io_device {
>> +    gpa_t addr;
>> +    int len;
>> +    const struct vgic_io_range *reg_ranges;
>> +    struct kvm_vcpu *redist_vcpu;
>> +    struct kvm_io_device dev;
>> +};
>> +
>>  struct vgic_dist {
>>      spinlock_t              lock;
>>      bool                    in_kernel;
>> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
>> index 7aae19b..71389b8 100644
>> --- a/virt/kvm/arm/vgic.c
>> +++ b/virt/kvm/arm/vgic.c
>> @@ -32,6 +32,8 @@
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_mmu.h>
>>  #include <trace/events/kvm.h>
>> +#include <asm/kvm.h>
>> +#include <kvm/iodev.h>
>>
>>  /*
>>   * How the whole thing works (courtesy of Christoffer Dall):
>> @@ -774,6 +776,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
>>  }
>>
>>  /**
>> + * vgic_handle_mmio_access - handle an in-kernel MMIO access
>> + * This is called by the read/write KVM IO device wrappers below.
>> + * @vcpu:   pointer to the vcpu performing the access
>> + * @this:   pointer to the KVM IO device in charge
>> + * @addr:   guest physical address of the access
>> + * @len:    size of the access
>> + * @val:    pointer to the data region
>> + * @is_write:       read or write access
>> + *
>> + * returns true if the MMIO access could be performed
>> + */
>> +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
>> +                               struct kvm_io_device *this, gpa_t addr,
>> +                               int len, void *val, bool is_write)
>> +{
>> +    struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>> +    struct vgic_io_device *iodev = container_of(this,
>> +                                                struct vgic_io_device, dev);
>> +    struct kvm_run *run = vcpu->run;
>> +    const struct vgic_io_range *range;
>> +    struct kvm_exit_mmio mmio;
>> +    bool updated_state;
>> +    gpa_t offset;
>> +
>> +    offset = addr - iodev->addr;
>> +    range = vgic_find_range(iodev->reg_ranges, len, offset);
>> +    if (unlikely(!range || !range->handle_mmio)) {
>> +            pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
>> +            return -ENXIO;
>> +    }
>> +
>> +    mmio.phys_addr = addr;
>> +    mmio.len = len;
>> +    mmio.is_write = is_write;
>> +    if (is_write)
>> +            memcpy(mmio.data, val, len);
>> +    mmio.private = iodev->redist_vcpu;
>> +
>> +    spin_lock(&dist->lock);
>> +    offset -= range->base;
>> +    if (vgic_validate_access(dist, range, offset)) {
>> +            updated_state = call_range_handler(vcpu, &mmio, offset, range);
>> +            if (!is_write)
>> +                    memcpy(val, mmio.data, len);
>> +    } else {
>> +            if (!is_write)
>> +                    memset(val, 0, len);
>> +            updated_state = false;
>> +    }
>> +    spin_unlock(&dist->lock);
>> +    kvm_prepare_mmio(run, &mmio);
>
> we're not the only user of kvm_exit_mmio I believe, so we could rename

(assuming you mean we _are_ the only user here, which I can acknowledge)

> this to vgic_io as well and you could change the mmio.data array to be a
> void *val pointer, which just gets set to the pointer passed into this
> function (which I think points to the kvm_run structs data array) and
> you can avoid all these memcopies, right?

That sounds indeed tempting, but the comment on the struct kvm_exit_mmio
declaration reads:
/*
 * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
 * which is an anonymous type. Use our own type instead.
 */
How I understand this the structure was introduced to _not_ use the same
memory, but use a copy instead. Do you remember any reason for this? And
in how far is this type anonymous? It's even in an uapi header.

Briefly looking at the code we do quite some memcpy on the way.
I am about to go all the way down into that ARM MMIO handling cave now
to check this (Marc, if I am not showing up again after some hours,
please come and rescue me ;-)

Cheers,
Andre.

-- IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium.  Thank you.

ARM Limited, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2557590
ARM Holdings plc, Registered office 110 Fulbourn Road, Cambridge CB1 9NJ, Registered in England & Wales, Company No:  2548782
Andre Przywara March 20, 2015, 12:40 p.m. UTC | #3
On 03/19/2015 03:44 PM, Andre Przywara wrote:
> Hej Christoffer,
> 

[ ... ]

>>> +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
>>> +				   struct kvm_io_device *this, gpa_t addr,
>>> +				   int len, void *val, bool is_write)
>>> +{
>>> +	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>>> +	struct vgic_io_device *iodev = container_of(this,
>>> +						    struct vgic_io_device, dev);
>>> +	struct kvm_run *run = vcpu->run;
>>> +	const struct vgic_io_range *range;
>>> +	struct kvm_exit_mmio mmio;
>>> +	bool updated_state;
>>> +	gpa_t offset;
>>> +
>>> +	offset = addr - iodev->addr;
>>> +	range = vgic_find_range(iodev->reg_ranges, len, offset);
>>> +	if (unlikely(!range || !range->handle_mmio)) {
>>> +		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
>>> +		return -ENXIO;
>>> +	}
>>> +
>>> +	mmio.phys_addr = addr;
>>> +	mmio.len = len;
>>> +	mmio.is_write = is_write;
>>> +	if (is_write)
>>> +		memcpy(mmio.data, val, len);
>>> +	mmio.private = iodev->redist_vcpu;
>>> +
>>> +	spin_lock(&dist->lock);
>>> +	offset -= range->base;
>>> +	if (vgic_validate_access(dist, range, offset)) {
>>> +		updated_state = call_range_handler(vcpu, &mmio, offset, range);
>>> +		if (!is_write)
>>> +			memcpy(val, mmio.data, len);
>>> +	} else {
>>> +		if (!is_write)
>>> +			memset(val, 0, len);
>>> +		updated_state = false;
>>> +	}
>>> +	spin_unlock(&dist->lock);
>>> +	kvm_prepare_mmio(run, &mmio);
>>
>> we're not the only user of kvm_exit_mmio I believe, so we could rename
> 
> (assuming you mean we _are_ the only user here, which I can acknowledge)
> 
>> this to vgic_io as well and you could change the mmio.data array to be a
>> void *val pointer, which just gets set to the pointer passed into this
>> function (which I think points to the kvm_run structs data array) and
>> you can avoid all these memcopies, right?
> 
> That sounds indeed tempting, but the comment on the struct kvm_exit_mmio
> declaration reads:
> /*
>  * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
>  * which is an anonymous type. Use our own type instead.
>  */
> How I understand this the structure was introduced to _not_ use the same
> memory, but use a copy instead. Do you remember any reason for this? And
> in how far is this type anonymous? It's even in an uapi header.
> 
> Briefly looking at the code we do quite some memcpy on the way.
> I am about to go all the way down into that ARM MMIO handling cave now
> to check this (Marc, if I am not showing up again after some hours,
> please come and rescue me ;-)

So, I feel that there is quite some unneeded copying and masking on the
way, but a real fix would be quite invasive and needs quite some testing
and review. I don't feel like rushing this into a v2 of this series.
I quickly did what you proposed (replacing memcpy by pointer
assignment), and that seems to work, but I don't have many chances of
testing this this weekend, since I am on the road. Also I have to dig
out my cross-endian test scripts first. So not sure if you want to take
the risk with this series.
I changed the other minor points you mentioned in the review though, so
do you want to have a "v1.5" or how do we proceed from here?

Cheers,
Andre.
Christoffer Dall March 20, 2015, 2:24 p.m. UTC | #4
On Thu, Mar 19, 2015 at 03:44:51PM +0000, Andre Przywara wrote:
> Hej Christoffer,
> 
> On 14/03/15 14:27, Christoffer Dall wrote:
> > On Fri, Mar 13, 2015 at 04:10:08PM +0000, Andre Przywara wrote:
> >> Currently we use a lot of VGIC specific code to do the MMIO
> >> dispatching.
> >> Use the previous reworks to add kvm_io_bus style MMIO handlers.
> >>
> >> Those are not yet called by the MMIO abort handler, also the actual
> >> VGIC emulator function do not make use of it yet, but will be enabled
> >> with the following patches.
> >>
> >> Signed-off-by: Andre Przywara <andre.przywara@arm.com>
> >> ---
> >>  include/kvm/arm_vgic.h |    9 ++++
> >>  virt/kvm/arm/vgic.c    |  111 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  virt/kvm/arm/vgic.h    |    7 +++
> >>  3 files changed, 127 insertions(+)
> >>
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index b81630b..4bfc6a3 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -24,6 +24,7 @@
> >>  #include <linux/irqreturn.h>
> >>  #include <linux/spinlock.h>
> >>  #include <linux/types.h>
> >> +#include <kvm/iodev.h>
> >>
> >>  #define VGIC_NR_IRQS_LEGACY 256
> >>  #define VGIC_NR_SGIS                16
> >> @@ -147,6 +148,14 @@ struct vgic_vm_ops {
> >>      int     (*map_resources)(struct kvm *, const struct vgic_params *);
> >>  };
> >>
> >> +struct vgic_io_device {
> >> +    gpa_t addr;
> >> +    int len;
> >> +    const struct vgic_io_range *reg_ranges;
> >> +    struct kvm_vcpu *redist_vcpu;
> >> +    struct kvm_io_device dev;
> >> +};
> >> +
> >>  struct vgic_dist {
> >>      spinlock_t              lock;
> >>      bool                    in_kernel;
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index 7aae19b..71389b8 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -32,6 +32,8 @@
> >>  #include <asm/kvm_arm.h>
> >>  #include <asm/kvm_mmu.h>
> >>  #include <trace/events/kvm.h>
> >> +#include <asm/kvm.h>
> >> +#include <kvm/iodev.h>
> >>
> >>  /*
> >>   * How the whole thing works (courtesy of Christoffer Dall):
> >> @@ -774,6 +776,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
> >>  }
> >>
> >>  /**
> >> + * vgic_handle_mmio_access - handle an in-kernel MMIO access
> >> + * This is called by the read/write KVM IO device wrappers below.
> >> + * @vcpu:   pointer to the vcpu performing the access
> >> + * @this:   pointer to the KVM IO device in charge
> >> + * @addr:   guest physical address of the access
> >> + * @len:    size of the access
> >> + * @val:    pointer to the data region
> >> + * @is_write:       read or write access
> >> + *
> >> + * returns true if the MMIO access could be performed
> >> + */
> >> +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
> >> +                               struct kvm_io_device *this, gpa_t addr,
> >> +                               int len, void *val, bool is_write)
> >> +{
> >> +    struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >> +    struct vgic_io_device *iodev = container_of(this,
> >> +                                                struct vgic_io_device, dev);
> >> +    struct kvm_run *run = vcpu->run;
> >> +    const struct vgic_io_range *range;
> >> +    struct kvm_exit_mmio mmio;
> >> +    bool updated_state;
> >> +    gpa_t offset;
> >> +
> >> +    offset = addr - iodev->addr;
> >> +    range = vgic_find_range(iodev->reg_ranges, len, offset);
> >> +    if (unlikely(!range || !range->handle_mmio)) {
> >> +            pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
> >> +            return -ENXIO;
> >> +    }
> >> +
> >> +    mmio.phys_addr = addr;
> >> +    mmio.len = len;
> >> +    mmio.is_write = is_write;
> >> +    if (is_write)
> >> +            memcpy(mmio.data, val, len);
> >> +    mmio.private = iodev->redist_vcpu;
> >> +
> >> +    spin_lock(&dist->lock);
> >> +    offset -= range->base;
> >> +    if (vgic_validate_access(dist, range, offset)) {
> >> +            updated_state = call_range_handler(vcpu, &mmio, offset, range);
> >> +            if (!is_write)
> >> +                    memcpy(val, mmio.data, len);
> >> +    } else {
> >> +            if (!is_write)
> >> +                    memset(val, 0, len);
> >> +            updated_state = false;
> >> +    }
> >> +    spin_unlock(&dist->lock);
> >> +    kvm_prepare_mmio(run, &mmio);
> >
> > we're not the only user of kvm_exit_mmio I believe, so we could rename
> 
> (assuming you mean we _are_ the only user here, which I can acknowledge)
> 

yes, I think wanted to write now, not not.

> > this to vgic_io as well and you could change the mmio.data array to be a
> > void *val pointer, which just gets set to the pointer passed into this
> > function (which I think points to the kvm_run structs data array) and
> > you can avoid all these memcopies, right?
> 
> That sounds indeed tempting, but the comment on the struct kvm_exit_mmio
> declaration reads:
> /*
>  * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
>  * which is an anonymous type. Use our own type instead.
>  */
> How I understand this the structure was introduced to _not_ use the same
> memory, but use a copy instead. Do you remember any reason for this? And
> in how far is this type anonymous? It's even in an uapi header.

the kvm exit api is designed without regard for how anything in-kernel
works.  Since the work we were doing was leveraging some of the handling
that pointed to the anonymous struct from user-space, we just copied its
definition.

> 
> Briefly looking at the code we do quite some memcpy on the way.
> I am about to go all the way down into that ARM MMIO handling cave now
> to check this (Marc, if I am not showing up again after some hours,
> please come and rescue me ;-)
> 

For the purposes of this series, the struct is just private vgic
parameter passing now.  You just use the struct kvm_exit_mmio pointer
inside the vgic code to call the range handlers.  You don't have to do
that, you can just define your own struct, call it vgic_mmio_params or
whatever, and instead of it having a data array, it now has a data
pointer.

What I think you'll find is that our need for struct kvm_exit_mmio then
goes away, because it is superseeded by what the kvm dev io bus thingy
now uses, and with some clever pattern matching the code should be
fairly trivial, with the worst part being changing the pointer type in
some places.

If it blows up and I missed something, we can leave this for a future
optimization, but it doesn't sound all that scary to me.

Thanks,
-Christoffer
Christoffer Dall March 20, 2015, 2:25 p.m. UTC | #5
On Fri, Mar 20, 2015 at 12:40:02PM +0000, Andre Przywara wrote:
> On 03/19/2015 03:44 PM, Andre Przywara wrote:
> > Hej Christoffer,
> > 
> 
> [ ... ]
> 
> >>> +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
> >>> +				   struct kvm_io_device *this, gpa_t addr,
> >>> +				   int len, void *val, bool is_write)
> >>> +{
> >>> +	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >>> +	struct vgic_io_device *iodev = container_of(this,
> >>> +						    struct vgic_io_device, dev);
> >>> +	struct kvm_run *run = vcpu->run;
> >>> +	const struct vgic_io_range *range;
> >>> +	struct kvm_exit_mmio mmio;
> >>> +	bool updated_state;
> >>> +	gpa_t offset;
> >>> +
> >>> +	offset = addr - iodev->addr;
> >>> +	range = vgic_find_range(iodev->reg_ranges, len, offset);
> >>> +	if (unlikely(!range || !range->handle_mmio)) {
> >>> +		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
> >>> +		return -ENXIO;
> >>> +	}
> >>> +
> >>> +	mmio.phys_addr = addr;
> >>> +	mmio.len = len;
> >>> +	mmio.is_write = is_write;
> >>> +	if (is_write)
> >>> +		memcpy(mmio.data, val, len);
> >>> +	mmio.private = iodev->redist_vcpu;
> >>> +
> >>> +	spin_lock(&dist->lock);
> >>> +	offset -= range->base;
> >>> +	if (vgic_validate_access(dist, range, offset)) {
> >>> +		updated_state = call_range_handler(vcpu, &mmio, offset, range);
> >>> +		if (!is_write)
> >>> +			memcpy(val, mmio.data, len);
> >>> +	} else {
> >>> +		if (!is_write)
> >>> +			memset(val, 0, len);
> >>> +		updated_state = false;
> >>> +	}
> >>> +	spin_unlock(&dist->lock);
> >>> +	kvm_prepare_mmio(run, &mmio);
> >>
> >> we're not the only user of kvm_exit_mmio I believe, so we could rename
> > 
> > (assuming you mean we _are_ the only user here, which I can acknowledge)
> > 
> >> this to vgic_io as well and you could change the mmio.data array to be a
> >> void *val pointer, which just gets set to the pointer passed into this
> >> function (which I think points to the kvm_run structs data array) and
> >> you can avoid all these memcopies, right?
> > 
> > That sounds indeed tempting, but the comment on the struct kvm_exit_mmio
> > declaration reads:
> > /*
> >  * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
> >  * which is an anonymous type. Use our own type instead.
> >  */
> > How I understand this the structure was introduced to _not_ use the same
> > memory, but use a copy instead. Do you remember any reason for this? And
> > in how far is this type anonymous? It's even in an uapi header.
> > 
> > Briefly looking at the code we do quite some memcpy on the way.
> > I am about to go all the way down into that ARM MMIO handling cave now
> > to check this (Marc, if I am not showing up again after some hours,
> > please come and rescue me ;-)
> 
> So, I feel that there is quite some unneeded copying and masking on the
> way, but a real fix would be quite invasive and needs quite some testing
> and review. I don't feel like rushing this into a v2 of this series.
> I quickly did what you proposed (replacing memcpy by pointer
> assignment), and that seems to work, but I don't have many chances of
> testing this this weekend, since I am on the road. Also I have to dig
> out my cross-endian test scripts first. So not sure if you want to take
> the risk with this series.
> I changed the other minor points you mentioned in the review though, so
> do you want to have a "v1.5" or how do we proceed from here?
> 
Why is it so invasive?  See my previous mail.  If you can convince me
that we're touching something truly nasty (code snippet?) then ok, we
can take a version without the cleanup and cleanup later.

If you're out of time, send me whatever you have, and we'll see if I fix
it up further or just leave it for the future.

Thanks,
-Christoffer
diff mbox

Patch

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index b81630b..4bfc6a3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -24,6 +24,7 @@ 
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <kvm/iodev.h>
 
 #define VGIC_NR_IRQS_LEGACY	256
 #define VGIC_NR_SGIS		16
@@ -147,6 +148,14 @@  struct vgic_vm_ops {
 	int	(*map_resources)(struct kvm *, const struct vgic_params *);
 };
 
+struct vgic_io_device {
+	gpa_t addr;
+	int len;
+	const struct vgic_io_range *reg_ranges;
+	struct kvm_vcpu *redist_vcpu;
+	struct kvm_io_device dev;
+};
+
 struct vgic_dist {
 	spinlock_t		lock;
 	bool			in_kernel;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 7aae19b..71389b8 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -32,6 +32,8 @@ 
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
 #include <trace/events/kvm.h>
+#include <asm/kvm.h>
+#include <kvm/iodev.h>
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -774,6 +776,66 @@  bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
 }
 
 /**
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
+ * @vcpu:	pointer to the vcpu performing the access
+ * @this:	pointer to the KVM IO device in charge
+ * @addr:	guest physical address of the access
+ * @len:	size of the access
+ * @val:	pointer to the data region
+ * @is_write:	read or write access
+ *
+ * returns true if the MMIO access could be performed
+ */
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+				   struct kvm_io_device *this, gpa_t addr,
+				   int len, void *val, bool is_write)
+{
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	struct vgic_io_device *iodev = container_of(this,
+						    struct vgic_io_device, dev);
+	struct kvm_run *run = vcpu->run;
+	const struct vgic_io_range *range;
+	struct kvm_exit_mmio mmio;
+	bool updated_state;
+	gpa_t offset;
+
+	offset = addr - iodev->addr;
+	range = vgic_find_range(iodev->reg_ranges, len, offset);
+	if (unlikely(!range || !range->handle_mmio)) {
+		pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
+		return -ENXIO;
+	}
+
+	mmio.phys_addr = addr;
+	mmio.len = len;
+	mmio.is_write = is_write;
+	if (is_write)
+		memcpy(mmio.data, val, len);
+	mmio.private = iodev->redist_vcpu;
+
+	spin_lock(&dist->lock);
+	offset -= range->base;
+	if (vgic_validate_access(dist, range, offset)) {
+		updated_state = call_range_handler(vcpu, &mmio, offset, range);
+		if (!is_write)
+			memcpy(val, mmio.data, len);
+	} else {
+		if (!is_write)
+			memset(val, 0, len);
+		updated_state = false;
+	}
+	spin_unlock(&dist->lock);
+	kvm_prepare_mmio(run, &mmio);
+	kvm_handle_mmio_return(vcpu, run);
+
+	if (updated_state)
+		vgic_kick_vcpus(vcpu->kvm);
+
+	return 0;
+}
+
+/**
  * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
  * @vcpu:      pointer to the vcpu performing the access
  * @run:       pointer to the kvm_run structure
@@ -797,6 +859,55 @@  bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
 }
 
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+				 struct kvm_io_device *this,
+				 gpa_t addr, int len, void *val)
+{
+	return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
+}
+
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+				  struct kvm_io_device *this,
+				  gpa_t addr, int len, const void *val)
+{
+	return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+				       true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+	.read	= vgic_handle_mmio_read,
+	.write	= vgic_handle_mmio_write,
+};
+
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+			     const struct vgic_io_range *ranges,
+			     int redist_id,
+			     struct vgic_io_device *iodev)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int ret;
+
+	if (redist_id >= 0)
+		vcpu = kvm_get_vcpu(kvm, redist_id);
+
+	iodev->addr		= base;
+	iodev->len		= len;
+	iodev->reg_ranges	= ranges;
+	iodev->redist_vcpu	= vcpu;
+
+	kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
+
+	mutex_lock(&kvm->slots_lock);
+
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
+				      &iodev->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
 {
 	return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h
index ffafb15..f2063a7 100644
--- a/virt/kvm/arm/vgic.h
+++ b/virt/kvm/arm/vgic.h
@@ -20,6 +20,8 @@ 
 #ifndef __KVM_VGIC_H__
 #define __KVM_VGIC_H__
 
+#include <kvm/iodev.h>
+
 #define VGIC_ADDR_UNDEF		(-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
@@ -82,6 +84,11 @@  struct vgic_io_range {
 			    phys_addr_t offset);
 };
 
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+			     const struct vgic_io_range *ranges,
+			     int redist_id,
+			     struct vgic_io_device *iodev);
+
 static inline bool is_in_range(phys_addr_t addr, unsigned long len,
 			       phys_addr_t baseaddr, unsigned long size)
 {