diff mbox

[v11,05/22] vfio iommu: Added pin and unpin callback functions to vfio_iommu_driver_ops

Message ID 1478293856-8191-6-git-send-email-kwankhede@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kirti Wankhede Nov. 4, 2016, 9:10 p.m. UTC
Added two new callback functions to struct vfio_iommu_driver_ops. Backend
IOMMU module that supports pining and unpinning pages for mdev devices
should provide these functions.
Added APIs for pining and unpining pages to VFIO module. These calls back
into backend iommu module to actually pin and unpin pages.
Renamed static functions in vfio_type1_iommu.c to resolve conflicts

Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Signed-off-by: Neo Jia <cjia@nvidia.com>
Change-Id: Ia7417723aaae86bec2959ad9ae6c2915ddd340e0
---
 drivers/vfio/vfio.c             | 96 +++++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio_iommu_type1.c | 20 ++++-----
 include/linux/vfio.h            | 14 +++++-
 3 files changed, 119 insertions(+), 11 deletions(-)

Comments

Alex Williamson Nov. 7, 2016, 7:36 p.m. UTC | #1
On Sat, 5 Nov 2016 02:40:39 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> Added two new callback functions to struct vfio_iommu_driver_ops. Backend
> IOMMU module that supports pining and unpinning pages for mdev devices
> should provide these functions.
> Added APIs for pining and unpining pages to VFIO module. These calls back
> into backend iommu module to actually pin and unpin pages.
> Renamed static functions in vfio_type1_iommu.c to resolve conflicts
> 
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Signed-off-by: Neo Jia <cjia@nvidia.com>
> Change-Id: Ia7417723aaae86bec2959ad9ae6c2915ddd340e0
> ---
>  drivers/vfio/vfio.c             | 96 +++++++++++++++++++++++++++++++++++++++++
>  drivers/vfio/vfio_iommu_type1.c | 20 ++++-----
>  include/linux/vfio.h            | 14 +++++-
>  3 files changed, 119 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2e83bdf007fe..76d260e98930 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1799,6 +1799,102 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  }
>  EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
>  
> +
> +/*
> + * Pin a set of guest PFNs and return their associated host PFNs for local
> + * domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs
> + * @npage [in]: count of array elements
> + * @prot [in] : protection flags
> + * @phys_pfn[out] : array of host PFNs
> + * Return error or number of pages pinned.
> + */
> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> +		   int npage, int prot, unsigned long *phys_pfn)
> +{
> +	struct vfio_container *container;
> +	struct vfio_group *group;
> +	struct vfio_iommu_driver *driver;
> +	int ret;
> +
> +	if (!dev || !user_pfn || !phys_pfn)
> +		return -EINVAL;
> +
> +	group = vfio_group_get_from_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	ret = vfio_group_add_container_user(group);
> +	if (ret)
> +		goto err_pin_pages;
> +
> +	container = group->container;
> +	down_read(&container->group_lock);
> +
> +	driver = container->iommu_driver;
> +	if (likely(driver && driver->ops->pin_pages))
> +		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> +					     npage, prot, phys_pfn);
> +	else
> +		ret = -EINVAL;

-ENOTTY might be a more appropriate error return here and below since
we're not signaling invalid argument, we're signaling lack of support.

> +
> +	up_read(&container->group_lock);
> +	vfio_group_try_dissolve_container(group);
> +
> +err_pin_pages:
> +	vfio_group_put(group);
> +	return ret;
> +

Unnecessary extra line

> +}
> +EXPORT_SYMBOL(vfio_pin_pages);
> +
> +/*
> + * Unpin set of host PFNs for local domain only.
> + * @dev [in] : device
> + * @user_pfn [in]: array of user/guest PFNs to be unpinned
> + * @pfn [in] : array of host PFNs to be unpinned.
> + * @npage [in] :count of elements in array, that is number of pages.
> + * Return error or number of pages unpinned.
> + */
> +int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> +		     unsigned long *pfn, int npage)
> +{
> +	struct vfio_container *container;
> +	struct vfio_group *group;
> +	struct vfio_iommu_driver *driver;
> +	int ret;
> +
> +	if (!dev || !pfn)
> +		return -EINVAL;
> +
> +	group = vfio_group_get_from_dev(dev);
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	ret = vfio_group_add_container_user(group);
> +	if (ret)
> +		goto err_unpin_pages;
> +
> +	container = group->container;
> +	down_read(&container->group_lock);
> +
> +	driver = container->iommu_driver;
> +	if (likely(driver && driver->ops->unpin_pages))
> +		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
> +					       pfn, npage);
> +	else
> +		ret = -EINVAL;
> +
> +	up_read(&container->group_lock);
> +	vfio_group_try_dissolve_container(group);
> +
> +err_unpin_pages:
> +	vfio_group_put(group);
> +	return ret;
> +}
> +EXPORT_SYMBOL(vfio_unpin_pages);
> +
>  /**
>   * Module/class support
>   */
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 2ba19424e4a1..7fb87f008e0a 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -259,8 +259,8 @@ static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
>   * the iommu can only map chunks of consecutive pfns anyway, so get the
>   * first page and all consecutive pages with the same locking.
>   */
> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> -			   int prot, unsigned long *pfn_base)
> +static long __vfio_pin_pages_remote(unsigned long vaddr, long npage,
> +				    int prot, unsigned long *pfn_base)

nit, what is the additional underscore prefix intended to imply?
Appending _remote is sufficient to avoid the symbol conflict.

>  {
>  	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>  	bool lock_cap = capable(CAP_IPC_LOCK);
> @@ -318,8 +318,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
>  	return i;
>  }
>  
> -static long vfio_unpin_pages(unsigned long pfn, long npage,
> -			     int prot, bool do_accounting)
> +static long __vfio_unpin_pages_remote(unsigned long pfn, long npage,
> +				      int prot, bool do_accounting)
>  {
>  	unsigned long unlocked = 0;
>  	long i;
> @@ -382,9 +382,9 @@ static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
>  		if (WARN_ON(!unmapped))
>  			break;
>  
> -		unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
> -					     unmapped >> PAGE_SHIFT,
> -					     dma->prot, false);
> +		unlocked += __vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
> +						      unmapped >> PAGE_SHIFT,
> +						      dma->prot, false);
>  		iova += unmapped;
>  
>  		cond_resched();
> @@ -613,8 +613,8 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>  
>  	while (size) {
>  		/* Pin a contiguous chunk of memory */
> -		npage = vfio_pin_pages(vaddr + dma->size,
> -				       size >> PAGE_SHIFT, prot, &pfn);
> +		npage = __vfio_pin_pages_remote(vaddr + dma->size,
> +						size >> PAGE_SHIFT, prot, &pfn);
>  		if (npage <= 0) {
>  			WARN_ON(!npage);
>  			ret = (int)npage;
> @@ -624,7 +624,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
>  		/* Map it! */
>  		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
>  		if (ret) {
> -			vfio_unpin_pages(pfn, npage, prot, true);
> +			__vfio_unpin_pages_remote(pfn, npage, prot, true);
>  			break;
>  		}
>  
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0ecae0b1cd34..ba1b64cb7d4b 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -75,7 +75,13 @@ struct vfio_iommu_driver_ops {
>  					struct iommu_group *group);
>  	void		(*detach_group)(void *iommu_data,
>  					struct iommu_group *group);
> -
> +	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
> +				     int npage, int prot,
> +				     unsigned long *phys_pfn);
> +	int		(*unpin_pages)(void *iommu_data,

Are we changing from long to int here simply because of the absurdity
in passing in more than a 2^31 entry array, that would already consume
more than 16GB itself?

> +				       unsigned long *user_pfn,
> +				       unsigned long *pfn,

nit, use phys_pfn so as to match the pin function?

> +				       int npage);
>  };
>  
>  extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> @@ -127,6 +133,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>  }
>  #endif /* CONFIG_EEH */
>  
> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> +			  int npage, int prot, unsigned long *phys_pfn);
> +
> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> +			    unsigned long *pfn, int npage);
> +
>  /*
>   * IRQfd - generic
>   */

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirti Wankhede Nov. 8, 2016, 1:55 p.m. UTC | #2
On 11/8/2016 1:06 AM, Alex Williamson wrote:
> On Sat, 5 Nov 2016 02:40:39 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
...
>> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
>> +		   int npage, int prot, unsigned long *phys_pfn)
>> +{
>> +	struct vfio_container *container;
>> +	struct vfio_group *group;
>> +	struct vfio_iommu_driver *driver;
>> +	int ret;
>> +
>> +	if (!dev || !user_pfn || !phys_pfn)
>> +		return -EINVAL;
>> +
>> +	group = vfio_group_get_from_dev(dev);
>> +	if (IS_ERR(group))
>> +		return PTR_ERR(group);
>> +
>> +	ret = vfio_group_add_container_user(group);
>> +	if (ret)
>> +		goto err_pin_pages;
>> +
>> +	container = group->container;
>> +	down_read(&container->group_lock);
>> +
>> +	driver = container->iommu_driver;
>> +	if (likely(driver && driver->ops->pin_pages))
>> +		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
>> +					     npage, prot, phys_pfn);
>> +	else
>> +		ret = -EINVAL;
> 
> -ENOTTY might be a more appropriate error return here and below since
> we're not signaling invalid argument, we're signaling lack of support.
> 

Used -EINVAL in sync with other driver->ops like read, write and mmap.
Changing it to -ENOTTY as you suggested above since these ops are optional.

...

>> -static long vfio_pin_pages(unsigned long vaddr, long npage,
>> -			   int prot, unsigned long *pfn_base)
>> +static long __vfio_pin_pages_remote(unsigned long vaddr, long npage,
>> +				    int prot, unsigned long *pfn_base)
> 
> nit, what is the additional underscore prefix intended to imply?
> Appending _remote is sufficient to avoid the symbol conflict.
>

This function name changed in review process from start, we started with
changing to __vfio_pin_pages and then added _remote to it later. We can
remove '__' from it. Updating.

...

>> -
>> +	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
>> +				     int npage, int prot,
>> +				     unsigned long *phys_pfn);
>> +	int		(*unpin_pages)(void *iommu_data,
> 
> Are we changing from long to int here simply because of the absurdity
> in passing in more than a 2^31 entry array, that would already consume
> more than 16GB itself?
> 

These are on demand pin/unpin request, will that request go beyond 16GB
limit? For Nvidia vGPU solution, pin request will not go beyond this limit.

>> +				       unsigned long *user_pfn,
>> +				       unsigned long *pfn,
> 
> nit, use phys_pfn so as to match the pin function?
> 

Ok.

>> +				       int npage);
>>  };
>>  
>>  extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
>> @@ -127,6 +133,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
>>  }
>>  #endif /* CONFIG_EEH */
>>  
>> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
>> +			  int npage, int prot, unsigned long *phys_pfn);
>> +
>> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
>> +			    unsigned long *pfn, int npage);
>> +
>>  /*
>>   * IRQfd - generic
>>   */
> 

Thanks,
Kirti
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Williamson Nov. 8, 2016, 4:39 p.m. UTC | #3
On Tue, 8 Nov 2016 19:25:35 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 11/8/2016 1:06 AM, Alex Williamson wrote:
> > On Sat, 5 Nov 2016 02:40:39 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> ...
> >> +int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> >> +		   int npage, int prot, unsigned long *phys_pfn)
> >> +{
> >> +	struct vfio_container *container;
> >> +	struct vfio_group *group;
> >> +	struct vfio_iommu_driver *driver;
> >> +	int ret;
> >> +
> >> +	if (!dev || !user_pfn || !phys_pfn)
> >> +		return -EINVAL;
> >> +
> >> +	group = vfio_group_get_from_dev(dev);
> >> +	if (IS_ERR(group))
> >> +		return PTR_ERR(group);
> >> +
> >> +	ret = vfio_group_add_container_user(group);
> >> +	if (ret)
> >> +		goto err_pin_pages;
> >> +
> >> +	container = group->container;
> >> +	down_read(&container->group_lock);
> >> +
> >> +	driver = container->iommu_driver;
> >> +	if (likely(driver && driver->ops->pin_pages))
> >> +		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
> >> +					     npage, prot, phys_pfn);
> >> +	else
> >> +		ret = -EINVAL;  
> > 
> > -ENOTTY might be a more appropriate error return here and below since
> > we're not signaling invalid argument, we're signaling lack of support.
> >   
> 
> Used -EINVAL in sync with other driver->ops like read, write and mmap.
> Changing it to -ENOTTY as you suggested above since these ops are optional.

TBH, I'm not sure which is better, but it's nice to try to
differentiate different error paths with different errno values.

> ...
> 
> >> -static long vfio_pin_pages(unsigned long vaddr, long npage,
> >> -			   int prot, unsigned long *pfn_base)
> >> +static long __vfio_pin_pages_remote(unsigned long vaddr, long npage,
> >> +				    int prot, unsigned long *pfn_base)  
> > 
> > nit, what is the additional underscore prefix intended to imply?
> > Appending _remote is sufficient to avoid the symbol conflict.
> >  
> 
> This function name changed in review process from start, we started with
> changing to __vfio_pin_pages and then added _remote to it later. We can
> remove '__' from it. Updating.
> 
> ...
> 
> >> -
> >> +	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
> >> +				     int npage, int prot,
> >> +				     unsigned long *phys_pfn);
> >> +	int		(*unpin_pages)(void *iommu_data,  
> > 
> > Are we changing from long to int here simply because of the absurdity
> > in passing in more than a 2^31 entry array, that would already consume
> > more than 16GB itself?
> >   
> 
> These are on demand pin/unpin request, will that request go beyond 16GB
> limit? For Nvidia vGPU solution, pin request will not go beyond this limit.

16G is simply the size of the user_pfn or phys_pfn arrays at a maximal
int32_t npage value, the interface actually allows mapping up to 8TB
per call, but at that point we have 16GB of input, 16GB of output, and
80GB of vfio_pfns created.  So I don't really have a problem changing
form long to int given lack of scalability in the API in general, but
it does make me second guess the API itself.  Thanks,

Alex
 
> >> +				       unsigned long *user_pfn,
> >> +				       unsigned long *pfn,  
> > 
> > nit, use phys_pfn so as to match the pin function?
> >   
> 
> Ok.
> 
> >> +				       int npage);
> >>  };
> >>  
> >>  extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
> >> @@ -127,6 +133,12 @@ static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
> >>  }
> >>  #endif /* CONFIG_EEH */
> >>  
> >> +extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
> >> +			  int npage, int prot, unsigned long *phys_pfn);
> >> +
> >> +extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
> >> +			    unsigned long *pfn, int npage);
> >> +
> >>  /*
> >>   * IRQfd - generic
> >>   */  
> >   
> 
> Thanks,
> Kirti

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kirti Wankhede Nov. 8, 2016, 6:47 p.m. UTC | #4
On 11/8/2016 10:09 PM, Alex Williamson wrote:
> On Tue, 8 Nov 2016 19:25:35 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
> 
...

>>>> -
>>>> +	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
>>>> +				     int npage, int prot,
>>>> +				     unsigned long *phys_pfn);
>>>> +	int		(*unpin_pages)(void *iommu_data,  
>>>
>>> Are we changing from long to int here simply because of the absurdity
>>> in passing in more than a 2^31 entry array, that would already consume
>>> more than 16GB itself?
>>>   
>>
>> These are on demand pin/unpin request, will that request go beyond 16GB
>> limit? For Nvidia vGPU solution, pin request will not go beyond this limit.
> 
> 16G is simply the size of the user_pfn or phys_pfn arrays at a maximal
> int32_t npage value, the interface actually allows mapping up to 8TB
> per call, but at that point we have 16GB of input, 16GB of output, and
> 80GB of vfio_pfns created.  So I don't really have a problem changing
> form long to int given lack of scalability in the API in general, but
> it does make me second guess the API itself.  Thanks,
> 

Changing to 'long', in future we might enhance this API without changing
it signature.

Thanks,
Kirti
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Williamson Nov. 8, 2016, 7:14 p.m. UTC | #5
On Wed, 9 Nov 2016 00:17:53 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:

> On 11/8/2016 10:09 PM, Alex Williamson wrote:
> > On Tue, 8 Nov 2016 19:25:35 +0530
> > Kirti Wankhede <kwankhede@nvidia.com> wrote:
> >   
> ...
> 
> >>>> -
> >>>> +	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
> >>>> +				     int npage, int prot,
> >>>> +				     unsigned long *phys_pfn);
> >>>> +	int		(*unpin_pages)(void *iommu_data,    
> >>>
> >>> Are we changing from long to int here simply because of the absurdity
> >>> in passing in more than a 2^31 entry array, that would already consume
> >>> more than 16GB itself?
> >>>     
> >>
> >> These are on demand pin/unpin request, will that request go beyond 16GB
> >> limit? For Nvidia vGPU solution, pin request will not go beyond this limit.  
> > 
> > 16G is simply the size of the user_pfn or phys_pfn arrays at a maximal
> > int32_t npage value, the interface actually allows mapping up to 8TB
> > per call, but at that point we have 16GB of input, 16GB of output, and
> > 80GB of vfio_pfns created.  So I don't really have a problem changing
> > form long to int given lack of scalability in the API in general, but
> > it does make me second guess the API itself.  Thanks,
> >   
> 
> Changing to 'long', in future we might enhance this API without changing
> it signature.

I think the pfn arrays are more of a problem long term than whether we
can only map 2^31 pfns in one call.  I particularly dislike that the
caller provides both the iova and pfn arrays for unpinning.  Being an
in-kernel driver, we should trust it, but it makes the interface
difficult to use and seems like it indicates that our tracking data
structures aren't architected the way they should be.  Upstream, this
API will need to be flexible and change over time, it's only the
downstream distros that may lock in a kABI.  Not breaking them should
be a consideration, but also needs to be weighted against long term
upstream goals.  Thanks,

Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2e83bdf007fe..76d260e98930 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1799,6 +1799,102 @@  void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 }
 EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
 
+
+/*
+ * Pin a set of guest PFNs and return their associated host PFNs for local
+ * domain only.
+ * @dev [in] : device
+ * @user_pfn [in]: array of user/guest PFNs
+ * @npage [in]: count of array elements
+ * @prot [in] : protection flags
+ * @phys_pfn[out] : array of host PFNs
+ * Return error or number of pages pinned.
+ */
+int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+		   int npage, int prot, unsigned long *phys_pfn)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !user_pfn || !phys_pfn)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_pin_pages;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->pin_pages))
+		ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+					     npage, prot, phys_pfn);
+	else
+		ret = -EINVAL;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_pin_pages:
+	vfio_group_put(group);
+	return ret;
+
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin set of host PFNs for local domain only.
+ * @dev [in] : device
+ * @user_pfn [in]: array of user/guest PFNs to be unpinned
+ * @pfn [in] : array of host PFNs to be unpinned.
+ * @npage [in] :count of elements in array, that is number of pages.
+ * Return error or number of pages unpinned.
+ */
+int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+		     unsigned long *pfn, int npage)
+{
+	struct vfio_container *container;
+	struct vfio_group *group;
+	struct vfio_iommu_driver *driver;
+	int ret;
+
+	if (!dev || !pfn)
+		return -EINVAL;
+
+	group = vfio_group_get_from_dev(dev);
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	ret = vfio_group_add_container_user(group);
+	if (ret)
+		goto err_unpin_pages;
+
+	container = group->container;
+	down_read(&container->group_lock);
+
+	driver = container->iommu_driver;
+	if (likely(driver && driver->ops->unpin_pages))
+		ret = driver->ops->unpin_pages(container->iommu_data, user_pfn,
+					       pfn, npage);
+	else
+		ret = -EINVAL;
+
+	up_read(&container->group_lock);
+	vfio_group_try_dissolve_container(group);
+
+err_unpin_pages:
+	vfio_group_put(group);
+	return ret;
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
 /**
  * Module/class support
  */
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ba19424e4a1..7fb87f008e0a 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -259,8 +259,8 @@  static int vaddr_get_pfn(unsigned long vaddr, int prot, unsigned long *pfn)
  * the iommu can only map chunks of consecutive pfns anyway, so get the
  * first page and all consecutive pages with the same locking.
  */
-static long vfio_pin_pages(unsigned long vaddr, long npage,
-			   int prot, unsigned long *pfn_base)
+static long __vfio_pin_pages_remote(unsigned long vaddr, long npage,
+				    int prot, unsigned long *pfn_base)
 {
 	unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 	bool lock_cap = capable(CAP_IPC_LOCK);
@@ -318,8 +318,8 @@  static long vfio_pin_pages(unsigned long vaddr, long npage,
 	return i;
 }
 
-static long vfio_unpin_pages(unsigned long pfn, long npage,
-			     int prot, bool do_accounting)
+static long __vfio_unpin_pages_remote(unsigned long pfn, long npage,
+				      int prot, bool do_accounting)
 {
 	unsigned long unlocked = 0;
 	long i;
@@ -382,9 +382,9 @@  static void vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma)
 		if (WARN_ON(!unmapped))
 			break;
 
-		unlocked += vfio_unpin_pages(phys >> PAGE_SHIFT,
-					     unmapped >> PAGE_SHIFT,
-					     dma->prot, false);
+		unlocked += __vfio_unpin_pages_remote(phys >> PAGE_SHIFT,
+						      unmapped >> PAGE_SHIFT,
+						      dma->prot, false);
 		iova += unmapped;
 
 		cond_resched();
@@ -613,8 +613,8 @@  static int vfio_dma_do_map(struct vfio_iommu *iommu,
 
 	while (size) {
 		/* Pin a contiguous chunk of memory */
-		npage = vfio_pin_pages(vaddr + dma->size,
-				       size >> PAGE_SHIFT, prot, &pfn);
+		npage = __vfio_pin_pages_remote(vaddr + dma->size,
+						size >> PAGE_SHIFT, prot, &pfn);
 		if (npage <= 0) {
 			WARN_ON(!npage);
 			ret = (int)npage;
@@ -624,7 +624,7 @@  static int vfio_dma_do_map(struct vfio_iommu *iommu,
 		/* Map it! */
 		ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, prot);
 		if (ret) {
-			vfio_unpin_pages(pfn, npage, prot, true);
+			__vfio_unpin_pages_remote(pfn, npage, prot, true);
 			break;
 		}
 
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 0ecae0b1cd34..ba1b64cb7d4b 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -75,7 +75,13 @@  struct vfio_iommu_driver_ops {
 					struct iommu_group *group);
 	void		(*detach_group)(void *iommu_data,
 					struct iommu_group *group);
-
+	int		(*pin_pages)(void *iommu_data, unsigned long *user_pfn,
+				     int npage, int prot,
+				     unsigned long *phys_pfn);
+	int		(*unpin_pages)(void *iommu_data,
+				       unsigned long *user_pfn,
+				       unsigned long *pfn,
+				       int npage);
 };
 
 extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
@@ -127,6 +133,12 @@  static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 }
 #endif /* CONFIG_EEH */
 
+extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+			  int npage, int prot, unsigned long *phys_pfn);
+
+extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
+			    unsigned long *pfn, int npage);
+
 /*
  * IRQfd - generic
  */