diff mbox series

[v2,11/16] KVM: introduce a 'mmap' method for KVM devices

Message ID 20190222112840.25000-12-clg@kaod.org (mailing list archive)
State New, archived
Headers show
Series KVM: PPC: Book3S HV: add XIVE native exploitation mode | expand

Commit Message

Cédric Le Goater Feb. 22, 2019, 11:28 a.m. UTC
Some KVM devices will want to handle special mappings related to the
underlying HW. For instance, the XIVE interrupt controller of the
POWER9 processor has MMIO pages for thread interrupt management and
for interrupt source control that need to be exposed to the guest when
the OS has the required support.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 11 +++++++++++
 2 files changed, 12 insertions(+)

Comments

David Gibson Feb. 25, 2019, 3:33 a.m. UTC | #1
On Fri, Feb 22, 2019 at 12:28:35PM +0100, Cédric Le Goater wrote:
> Some KVM devices will want to handle special mappings related to the
> underlying HW. For instance, the XIVE interrupt controller of the
> POWER9 processor has MMIO pages for thread interrupt management and
> for interrupt source control that need to be exposed to the guest when
> the OS has the required support.
> 
> Signed-off-by: Cédric Le Goater <clg@kaod.org>

Ah, when I suggested mmap() on the base device fd, I hadn't realized
there wasn't a facility for that yet.

Have you discussed this with Paolo?  We'll need some core KVM buy in
to merge this.

> ---
>  include/linux/kvm_host.h |  1 +
>  virt/kvm/kvm_main.c      | 11 +++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index c38cc5eb7e73..cbf81487b69f 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -1223,6 +1223,7 @@ struct kvm_device_ops {
>  	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
>  	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
>  		      unsigned long arg);
> +	int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
>  };
>  
>  void kvm_device_get(struct kvm_device *dev);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 585845203db8..84717d8cb5e4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2878,6 +2878,16 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
>  }
>  #endif
>  
> +static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct kvm_device *dev = filp->private_data;
> +
> +	if (dev->ops->mmap)
> +		return dev->ops->mmap(dev, vma);
> +
> +	return -ENODEV;
> +}
> +
>  static int kvm_device_ioctl_attr(struct kvm_device *dev,
>  				 int (*accessor)(struct kvm_device *dev,
>  						 struct kvm_device_attr *attr),
> @@ -2927,6 +2937,7 @@ static const struct file_operations kvm_device_fops = {
>  	.unlocked_ioctl = kvm_device_ioctl,
>  	.release = kvm_device_release,
>  	KVM_COMPAT(kvm_device_ioctl),
> +	.mmap = kvm_device_mmap,
>  };
>  
>  struct kvm_device *kvm_device_from_filp(struct file *filp)
Cédric Le Goater Feb. 25, 2019, 10:57 a.m. UTC | #2
Hello Paolo,

On 2/25/19 4:33 AM, David Gibson wrote:
> On Fri, Feb 22, 2019 at 12:28:35PM +0100, Cédric Le Goater wrote:
>> Some KVM devices will want to handle special mappings related to the
>> underlying HW. For instance, the XIVE interrupt controller of the
>> POWER9 processor has MMIO pages for thread interrupt management and
>> for interrupt source control that need to be exposed to the guest when
>> the OS has the required support.
>>
>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> 
> Ah, when I suggested mmap() on the base device fd, I hadn't realized
> there wasn't a facility for that yet.
> 
> Have you discussed this with Paolo?  

Not yet.

> We'll need some core KVM buy in to merge this.

Here is an extension of the KVM device to allow special mappings.
Something we would need for the support of the POWER9 XIVE interrupt 
controller.

There are two MMIOs we need to expose to the guest : 

 1. HW MMIO controlling of the interrupt presenter registers (TIMA)
 2. HW MMIO of the interrupt sources for interrupt management (ESB)

The TIMA could have been exposed with a page offset in the vCPU mapping
but as it only makes sense when the XIVE interrupt mode is active, we 
chose to use directly the KVM device fd for that. Is that ok ? 

An alternate solution is to use a device ioctl to allocate an anon fd
and do the mapping, but that seems like extra fuss for the same result. 

Thanks,

C. 

>> ---
>>  include/linux/kvm_host.h |  1 +
>>  virt/kvm/kvm_main.c      | 11 +++++++++++
>>  2 files changed, 12 insertions(+)
>>
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index c38cc5eb7e73..cbf81487b69f 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -1223,6 +1223,7 @@ struct kvm_device_ops {
>>  	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
>>  	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
>>  		      unsigned long arg);
>> +	int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
>>  };
>>  
>>  void kvm_device_get(struct kvm_device *dev);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 585845203db8..84717d8cb5e4 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -2878,6 +2878,16 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
>>  }
>>  #endif
>>  
>> +static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
>> +{
>> +	struct kvm_device *dev = filp->private_data;
>> +
>> +	if (dev->ops->mmap)
>> +		return dev->ops->mmap(dev, vma);
>> +
>> +	return -ENODEV;
>> +}
>> +
>>  static int kvm_device_ioctl_attr(struct kvm_device *dev,
>>  				 int (*accessor)(struct kvm_device *dev,
>>  						 struct kvm_device_attr *attr),
>> @@ -2927,6 +2937,7 @@ static const struct file_operations kvm_device_fops = {
>>  	.unlocked_ioctl = kvm_device_ioctl,
>>  	.release = kvm_device_release,
>>  	KVM_COMPAT(kvm_device_ioctl),
>> +	.mmap = kvm_device_mmap,
>>  };
>>  
>>  struct kvm_device *kvm_device_from_filp(struct file *filp)
>
Paolo Bonzini Feb. 26, 2019, 12:52 p.m. UTC | #3
On 25/02/19 11:57, Cédric Le Goater wrote:
> Hello Paolo,
> 
> On 2/25/19 4:33 AM, David Gibson wrote:
>> On Fri, Feb 22, 2019 at 12:28:35PM +0100, Cédric Le Goater wrote:
>>> Some KVM devices will want to handle special mappings related to the
>>> underlying HW. For instance, the XIVE interrupt controller of the
>>> POWER9 processor has MMIO pages for thread interrupt management and
>>> for interrupt source control that need to be exposed to the guest when
>>> the OS has the required support.
>>>
>>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
>>
>> Ah, when I suggested mmap() on the base device fd, I hadn't realized
>> there wasn't a facility for that yet.
>>
>> Have you discussed this with Paolo?  
> 
> Not yet.
> 
>> We'll need some core KVM buy in to merge this.
> 
> Here is an extension of the KVM device to allow special mappings.
> Something we would need for the support of the POWER9 XIVE interrupt 
> controller.
> 
> There are two MMIOs we need to expose to the guest : 
> 
>  1. HW MMIO controlling of the interrupt presenter registers (TIMA)
>  2. HW MMIO of the interrupt sources for interrupt management (ESB)
> 
> The TIMA could have been exposed with a page offset in the vCPU mapping
> but as it only makes sense when the XIVE interrupt mode is active, we 
> chose to use directly the KVM device fd for that. Is that ok ? 
> 
> An alternate solution is to use a device ioctl to allocate an anon fd
> and do the mapping, but that seems like extra fuss for the same result. 

It's okay, it's a natural extension to dev_ops - but thanks for asking
anyway. :)

Paolo
David Gibson Feb. 26, 2019, 11:22 p.m. UTC | #4
On Tue, Feb 26, 2019 at 01:52:39PM +0100, Paolo Bonzini wrote:
> On 25/02/19 11:57, Cédric Le Goater wrote:
> > Hello Paolo,
> > 
> > On 2/25/19 4:33 AM, David Gibson wrote:
> >> On Fri, Feb 22, 2019 at 12:28:35PM +0100, Cédric Le Goater wrote:
> >>> Some KVM devices will want to handle special mappings related to the
> >>> underlying HW. For instance, the XIVE interrupt controller of the
> >>> POWER9 processor has MMIO pages for thread interrupt management and
> >>> for interrupt source control that need to be exposed to the guest when
> >>> the OS has the required support.
> >>>
> >>> Signed-off-by: Cédric Le Goater <clg@kaod.org>
> >>
> >> Ah, when I suggested mmap() on the base device fd, I hadn't realized
> >> there wasn't a facility for that yet.
> >>
> >> Have you discussed this with Paolo?  
> > 
> > Not yet.
> > 
> >> We'll need some core KVM buy in to merge this.
> > 
> > Here is an extension of the KVM device to allow special mappings.
> > Something we would need for the support of the POWER9 XIVE interrupt 
> > controller.
> > 
> > There are two MMIOs we need to expose to the guest : 
> > 
> >  1. HW MMIO controlling of the interrupt presenter registers (TIMA)
> >  2. HW MMIO of the interrupt sources for interrupt management (ESB)
> > 
> > The TIMA could have been exposed with a page offset in the vCPU mapping
> > but as it only makes sense when the XIVE interrupt mode is active, we 
> > chose to use directly the KVM device fd for that. Is that ok ? 
> > 
> > An alternate solution is to use a device ioctl to allocate an anon fd
> > and do the mapping, but that seems like extra fuss for the same result. 
> 
> It's okay, it's a natural extension to dev_ops - but thanks for asking
> anyway. :)

Ok, cool.

Given that, do you want to merge directly - since this looks sound
enough, even though the rest of the series needs some polish?  Or
would you prefer it to come in via Paulus' tree?
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c38cc5eb7e73..cbf81487b69f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1223,6 +1223,7 @@  struct kvm_device_ops {
 	int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr);
 	long (*ioctl)(struct kvm_device *dev, unsigned int ioctl,
 		      unsigned long arg);
+	int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
 };
 
 void kvm_device_get(struct kvm_device *dev);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 585845203db8..84717d8cb5e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2878,6 +2878,16 @@  static long kvm_vcpu_compat_ioctl(struct file *filp,
 }
 #endif
 
+static int kvm_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct kvm_device *dev = filp->private_data;
+
+	if (dev->ops->mmap)
+		return dev->ops->mmap(dev, vma);
+
+	return -ENODEV;
+}
+
 static int kvm_device_ioctl_attr(struct kvm_device *dev,
 				 int (*accessor)(struct kvm_device *dev,
 						 struct kvm_device_attr *attr),
@@ -2927,6 +2937,7 @@  static const struct file_operations kvm_device_fops = {
 	.unlocked_ioctl = kvm_device_ioctl,
 	.release = kvm_device_release,
 	KVM_COMPAT(kvm_device_ioctl),
+	.mmap = kvm_device_mmap,
 };
 
 struct kvm_device *kvm_device_from_filp(struct file *filp)