Message ID | 1584560474-19946-8-git-send-email-kwankhede@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KABIs to support migration for VFIO devices | expand |
On Thu, 19 Mar 2020 01:11:14 +0530 Kirti Wankhede <kwankhede@nvidia.com> wrote: > Added a check such that only singleton IOMMU groups can pin pages. > From the point when vendor driver pins any pages, consider IOMMU group > dirty page scope to be limited to pinned pages. > > To optimize to avoid walking list often, added flag > pinned_page_dirty_scope to indicate if all of the vfio_groups for each > vfio_domain in the domain_list dirty page scope is limited to pinned > pages. This flag is updated on first pinned pages request for that IOMMU > group and on attaching/detaching group. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > drivers/vfio/vfio.c | 13 +++++-- > drivers/vfio/vfio_iommu_type1.c | 77 +++++++++++++++++++++++++++++++++++++++-- > include/linux/vfio.h | 4 ++- > 3 files changed, 87 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 210fcf426643..311b5e4e111e 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -85,6 +85,7 @@ struct vfio_group { > atomic_t opened; > wait_queue_head_t container_q; > bool noiommu; > + unsigned int dev_counter; > struct kvm *kvm; > struct blocking_notifier_head notifier; > }; > @@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, > > mutex_lock(&group->device_lock); > list_add(&device->group_next, &group->device_list); > + group->dev_counter++; > mutex_unlock(&group->device_lock); > > return device; > @@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref) > struct vfio_group *group = device->group; > > list_del(&device->group_next); > + group->dev_counter--; > mutex_unlock(&group->device_lock); > > dev_set_drvdata(device->dev, NULL); > @@ -1933,6 +1936,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > if (!group) > return -ENODEV; > > + if (group->dev_counter > 1) > + return -EINVAL; > + > ret = vfio_group_add_container_user(group); > if (ret) > goto err_pin_pages; > @@ -1940,7 +1946,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > container = group->container; > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, > + ret = driver->ops->pin_pages(container->iommu_data, > + group->iommu_group, user_pfn, > npage, prot, phys_pfn); > else > ret = -ENOTTY; > @@ -2038,8 +2045,8 @@ int vfio_group_pin_pages(struct vfio_group *group, > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > ret = driver->ops->pin_pages(container->iommu_data, > - user_iova_pfn, npage, > - prot, phys_pfn); > + group->iommu_group, user_iova_pfn, > + npage, prot, phys_pfn); > else > ret = -ENOTTY; > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 912629320719..deec09f4b0f6 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -72,6 +72,7 @@ struct vfio_iommu { > bool v2; > bool nesting; > bool dirty_page_tracking; > + bool pinned_page_dirty_scope; > }; > > struct vfio_domain { > @@ -99,6 +100,7 @@ struct vfio_group { > struct iommu_group *iommu_group; > struct list_head next; > bool mdev_group; /* An mdev group */ > + bool pinned_page_dirty_scope; > }; > > struct vfio_iova { > @@ -132,6 +134,10 @@ struct vfio_regions { > static int put_pfn(unsigned long pfn, int prot); > static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group); > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > /* > * This code handles mapping and unmapping of user data buffers > * into DMA'ble space using the IOMMU > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, > } > > static int vfio_iommu_type1_pin_pages(void *iommu_data, > + struct iommu_group *iommu_group, > unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn) > { > struct vfio_iommu *iommu = iommu_data; > + struct vfio_group *group; > int i, j, ret; > unsigned long remote_vaddr; > struct vfio_dma *dma; > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > (vpfn->iova - dma->iova) >> pgshift, 1); > } > } > - > ret = i; > + > + group = vfio_iommu_find_iommu_group(iommu, iommu_group); > + if (!group->pinned_page_dirty_scope) { > + group->pinned_page_dirty_scope = true; > + update_pinned_page_dirty_scope(iommu); > + } > + > goto pin_done; > > pin_unwind: > @@ -913,8 +927,11 @@ static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova, > npages = dma->size >> pgshift; > bitmap_size = DIRTY_BITMAP_BYTES(npages); > > - /* mark all pages dirty if all pages are pinned and mapped. */ > - if (dma->iommu_mapped) > + /* > + * mark all pages dirty if any IOMMU capable device is not able > + * to report dirty pages and all pages are pinned and mapped. > + */ > + if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) > bitmap_set(dma->bitmap, 0, npages); > > if (copy_to_user((void __user *)bitmap, dma->bitmap, bitmap_size)) > @@ -1393,6 +1410,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, > return NULL; > } > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group = NULL; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + group = find_iommu_group(domain, iommu_group); > + if (group) > + return group; > + } > + > + if (iommu->external_domain) > + group = find_iommu_group(iommu->external_domain, iommu_group); > + > + return group; > +} > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + if (iommu->external_domain) { > + domain = iommu->external_domain; > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + iommu->pinned_page_dirty_scope = true; > +} > + > static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, > phys_addr_t *base) > { > @@ -1799,6 +1861,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > > list_add(&group->next, > &iommu->external_domain->group_list); > + group->pinned_page_dirty_scope = true; > + if (!iommu->pinned_page_dirty_scope) > + update_pinned_page_dirty_scope(iommu); A comment above this would be good since this wasn't entirely obvious, maybe: /* * Non-iommu backed group cannot dirty memory directly, * it can only use interfaces that provide dirty tracking. * The iommu scope can only be promoted with the addition * of a dirty tracking group. */ > mutex_unlock(&iommu->lock); > > return 0; > @@ -1921,6 +1986,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > done: > /* Delete the old one and insert new iova list */ > vfio_iommu_iova_insert_copy(iommu, &iova_copy); > + iommu->pinned_page_dirty_scope = false; Likewise here: /* * An iommu backed group can dirty memory directly and therefore * demotes the iommu scope until it declares itself dirty tracking * capable via the page pinning interface. */ > mutex_unlock(&iommu->lock); > vfio_iommu_resv_free(&group_resv_regions); > > @@ -2073,6 +2139,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > struct vfio_iommu *iommu = iommu_data; > struct vfio_domain *domain; > struct vfio_group *group; > + bool update_dirty_scope = false; > LIST_HEAD(iova_copy); > > mutex_lock(&iommu->lock); > @@ -2080,6 +2147,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > if (iommu->external_domain) { > group = find_iommu_group(iommu->external_domain, iommu_group); > if (group) { > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > > @@ -2109,6 +2177,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > continue; > > vfio_iommu_detach_group(domain, group); > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > /* > @@ -2139,6 +2208,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > vfio_iommu_iova_free(&iova_copy); > > detach_group_done: > + if (update_dirty_scope) > + update_pinned_page_dirty_scope(iommu); And one more /* * Removal of a group without dirty tracking may * allow the iommu scope to be promoted. */ > mutex_unlock(&iommu->lock); > } > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index be2bd358b952..702e1d7b6e8b 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -72,7 +72,9 @@ struct vfio_iommu_driver_ops { > struct iommu_group *group); > void (*detach_group)(void *iommu_data, > struct iommu_group *group); > - int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, > + int (*pin_pages)(void *iommu_data, > + struct iommu_group *group, > + unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn); > int (*unpin_pages)(void *iommu_data,
On Thu, Mar 19, 2020 at 03:41:14AM +0800, Kirti Wankhede wrote: > Added a check such that only singleton IOMMU groups can pin pages. > From the point when vendor driver pins any pages, consider IOMMU group > dirty page scope to be limited to pinned pages. > > To optimize to avoid walking list often, added flag > pinned_page_dirty_scope to indicate if all of the vfio_groups for each > vfio_domain in the domain_list dirty page scope is limited to pinned > pages. This flag is updated on first pinned pages request for that IOMMU > group and on attaching/detaching group. > > Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com> > Reviewed-by: Neo Jia <cjia@nvidia.com> > --- > drivers/vfio/vfio.c | 13 +++++-- > drivers/vfio/vfio_iommu_type1.c | 77 +++++++++++++++++++++++++++++++++++++++-- > include/linux/vfio.h | 4 ++- > 3 files changed, 87 insertions(+), 7 deletions(-) > > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index 210fcf426643..311b5e4e111e 100644 > --- a/drivers/vfio/vfio.c > +++ b/drivers/vfio/vfio.c > @@ -85,6 +85,7 @@ struct vfio_group { > atomic_t opened; > wait_queue_head_t container_q; > bool noiommu; > + unsigned int dev_counter; > struct kvm *kvm; > struct blocking_notifier_head notifier; > }; > @@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, > > mutex_lock(&group->device_lock); > list_add(&device->group_next, &group->device_list); > + group->dev_counter++; > mutex_unlock(&group->device_lock); > > return device; > @@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref) > struct vfio_group *group = device->group; > > list_del(&device->group_next); > + group->dev_counter--; > mutex_unlock(&group->device_lock); > > dev_set_drvdata(device->dev, NULL); > @@ -1933,6 +1936,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > if (!group) > return -ENODEV; > > + if (group->dev_counter > 1) > + return -EINVAL; > + > ret = vfio_group_add_container_user(group); > if (ret) > goto err_pin_pages; > @@ -1940,7 +1946,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, > container = group->container; > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, > + ret = driver->ops->pin_pages(container->iommu_data, > + group->iommu_group, user_pfn, > npage, prot, phys_pfn); > else > ret = -ENOTTY; > @@ -2038,8 +2045,8 @@ int vfio_group_pin_pages(struct vfio_group *group, > driver = container->iommu_driver; > if (likely(driver && driver->ops->pin_pages)) > ret = driver->ops->pin_pages(container->iommu_data, > - user_iova_pfn, npage, > - prot, phys_pfn); > + group->iommu_group, user_iova_pfn, > + npage, prot, phys_pfn); > else > ret = -ENOTTY; > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > index 912629320719..deec09f4b0f6 100644 > --- a/drivers/vfio/vfio_iommu_type1.c > +++ b/drivers/vfio/vfio_iommu_type1.c > @@ -72,6 +72,7 @@ struct vfio_iommu { > bool v2; > bool nesting; > bool dirty_page_tracking; > + bool pinned_page_dirty_scope; > }; > > struct vfio_domain { > @@ -99,6 +100,7 @@ struct vfio_group { > struct iommu_group *iommu_group; > struct list_head next; > bool mdev_group; /* An mdev group */ > + bool pinned_page_dirty_scope; > }; > > struct vfio_iova { > @@ -132,6 +134,10 @@ struct vfio_regions { > static int put_pfn(unsigned long pfn, int prot); > static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group); > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > /* > * This code handles mapping and unmapping of user data buffers > * into DMA'ble space using the IOMMU > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, > } > > static int vfio_iommu_type1_pin_pages(void *iommu_data, > + struct iommu_group *iommu_group, > unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn) > { > struct vfio_iommu *iommu = iommu_data; > + struct vfio_group *group; > int i, j, ret; > unsigned long remote_vaddr; > struct vfio_dma *dma; > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > (vpfn->iova - dma->iova) >> pgshift, 1); > } > } Could you provide an interface lightweight than vfio_pin_pages for pass-through devices? e.g. vfio_mark_iova_dirty() Or at least allowing phys_pfn to be empty for pass-through devices. This is really inefficient: bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1)); i.e. in order to mark an iova dirty, it has to go through iova ---> pfn --> iova while acquiring pfn is not necessary for pass-through devices. > - > ret = i; > + > + group = vfio_iommu_find_iommu_group(iommu, iommu_group); > + if (!group->pinned_page_dirty_scope) { > + group->pinned_page_dirty_scope = true; > + update_pinned_page_dirty_scope(iommu); > + } > + > goto pin_done; > > pin_unwind: > @@ -913,8 +927,11 @@ static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova, > npages = dma->size >> pgshift; > bitmap_size = DIRTY_BITMAP_BYTES(npages); > > - /* mark all pages dirty if all pages are pinned and mapped. */ > - if (dma->iommu_mapped) > + /* > + * mark all pages dirty if any IOMMU capable device is not able > + * to report dirty pages and all pages are pinned and mapped. > + */ > + if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) > bitmap_set(dma->bitmap, 0, npages); > > if (copy_to_user((void __user *)bitmap, dma->bitmap, bitmap_size)) > @@ -1393,6 +1410,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, > return NULL; > } > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > + struct iommu_group *iommu_group) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group = NULL; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + group = find_iommu_group(domain, iommu_group); > + if (group) > + return group; > + } > + > + if (iommu->external_domain) > + group = find_iommu_group(iommu->external_domain, iommu_group); > + > + return group; > +} > + > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) > +{ > + struct vfio_domain *domain; > + struct vfio_group *group; > + > + list_for_each_entry(domain, &iommu->domain_list, next) { > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + if (iommu->external_domain) { > + domain = iommu->external_domain; > + list_for_each_entry(group, &domain->group_list, next) { > + if (!group->pinned_page_dirty_scope) { > + iommu->pinned_page_dirty_scope = false; > + return; > + } > + } > + } > + > + iommu->pinned_page_dirty_scope = true; > +} > + > static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, > phys_addr_t *base) > { > @@ -1799,6 +1861,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > > list_add(&group->next, > &iommu->external_domain->group_list); > + group->pinned_page_dirty_scope = true; > + if (!iommu->pinned_page_dirty_scope) > + update_pinned_page_dirty_scope(iommu); > mutex_unlock(&iommu->lock); > > return 0; > @@ -1921,6 +1986,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, > done: > /* Delete the old one and insert new iova list */ > vfio_iommu_iova_insert_copy(iommu, &iova_copy); > + iommu->pinned_page_dirty_scope = false; > mutex_unlock(&iommu->lock); > vfio_iommu_resv_free(&group_resv_regions); > > @@ -2073,6 +2139,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > struct vfio_iommu *iommu = iommu_data; > struct vfio_domain *domain; > struct vfio_group *group; > + bool update_dirty_scope = false; > LIST_HEAD(iova_copy); > > mutex_lock(&iommu->lock); > @@ -2080,6 +2147,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > if (iommu->external_domain) { > group = find_iommu_group(iommu->external_domain, iommu_group); > if (group) { > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > > @@ -2109,6 +2177,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > continue; > > vfio_iommu_detach_group(domain, group); > + update_dirty_scope = !group->pinned_page_dirty_scope; > list_del(&group->next); > kfree(group); > /* > @@ -2139,6 +2208,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, > vfio_iommu_iova_free(&iova_copy); > > detach_group_done: > + if (update_dirty_scope) > + update_pinned_page_dirty_scope(iommu); > mutex_unlock(&iommu->lock); > } > > diff --git a/include/linux/vfio.h b/include/linux/vfio.h > index be2bd358b952..702e1d7b6e8b 100644 > --- a/include/linux/vfio.h > +++ b/include/linux/vfio.h > @@ -72,7 +72,9 @@ struct vfio_iommu_driver_ops { > struct iommu_group *group); > void (*detach_group)(void *iommu_data, > struct iommu_group *group); > - int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, > + int (*pin_pages)(void *iommu_data, > + struct iommu_group *group, > + unsigned long *user_pfn, > int npage, int prot, > unsigned long *phys_pfn); > int (*unpin_pages)(void *iommu_data, > -- > 2.7.0 >
On Thu, 19 Mar 2020 02:24:33 -0400 Yan Zhao <yan.y.zhao@intel.com> wrote: > On Thu, Mar 19, 2020 at 03:41:14AM +0800, Kirti Wankhede wrote: > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > > index 912629320719..deec09f4b0f6 100644 > > --- a/drivers/vfio/vfio_iommu_type1.c > > +++ b/drivers/vfio/vfio_iommu_type1.c > > @@ -72,6 +72,7 @@ struct vfio_iommu { > > bool v2; > > bool nesting; > > bool dirty_page_tracking; > > + bool pinned_page_dirty_scope; > > }; > > > > struct vfio_domain { > > @@ -99,6 +100,7 @@ struct vfio_group { > > struct iommu_group *iommu_group; > > struct list_head next; > > bool mdev_group; /* An mdev group */ > > + bool pinned_page_dirty_scope; > > }; > > > > struct vfio_iova { > > @@ -132,6 +134,10 @@ struct vfio_regions { > > static int put_pfn(unsigned long pfn, int prot); > > static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); > > > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > > + struct iommu_group *iommu_group); > > + > > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > > /* > > * This code handles mapping and unmapping of user data buffers > > * into DMA'ble space using the IOMMU > > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, > > } > > > > static int vfio_iommu_type1_pin_pages(void *iommu_data, > > + struct iommu_group *iommu_group, > > unsigned long *user_pfn, > > int npage, int prot, > > unsigned long *phys_pfn) > > { > > struct vfio_iommu *iommu = iommu_data; > > + struct vfio_group *group; > > int i, j, ret; > > unsigned long remote_vaddr; > > struct vfio_dma *dma; > > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > > (vpfn->iova - dma->iova) >> pgshift, 1); > > } > > } > > Could you provide an interface lightweight than vfio_pin_pages for pass-through > devices? e.g. vfio_mark_iova_dirty() > > Or at least allowing phys_pfn to be empty for pass-through devices. > > This is really inefficient: > bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1)); > i.e. > in order to mark an iova dirty, it has to go through iova ---> pfn --> iova > while acquiring pfn is not necessary for pass-through devices. I think this would be possible, but I don't think it should be gating to this series. We don't have such consumers yet. Thanks, Alex
On Sat, Mar 21, 2020 at 03:41:42AM +0800, Alex Williamson wrote: > On Thu, 19 Mar 2020 02:24:33 -0400 > Yan Zhao <yan.y.zhao@intel.com> wrote: > > On Thu, Mar 19, 2020 at 03:41:14AM +0800, Kirti Wankhede wrote: > > > diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c > > > index 912629320719..deec09f4b0f6 100644 > > > --- a/drivers/vfio/vfio_iommu_type1.c > > > +++ b/drivers/vfio/vfio_iommu_type1.c > > > @@ -72,6 +72,7 @@ struct vfio_iommu { > > > bool v2; > > > bool nesting; > > > bool dirty_page_tracking; > > > + bool pinned_page_dirty_scope; > > > }; > > > > > > struct vfio_domain { > > > @@ -99,6 +100,7 @@ struct vfio_group { > > > struct iommu_group *iommu_group; > > > struct list_head next; > > > bool mdev_group; /* An mdev group */ > > > + bool pinned_page_dirty_scope; > > > }; > > > > > > struct vfio_iova { > > > @@ -132,6 +134,10 @@ struct vfio_regions { > > > static int put_pfn(unsigned long pfn, int prot); > > > static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); > > > > > > +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, > > > + struct iommu_group *iommu_group); > > > + > > > +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); > > > /* > > > * This code handles mapping and unmapping of user data buffers > > > * into DMA'ble space using the IOMMU > > > @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, > > > } > > > > > > static int vfio_iommu_type1_pin_pages(void *iommu_data, > > > + struct iommu_group *iommu_group, > > > unsigned long *user_pfn, > > > int npage, int prot, > > > unsigned long *phys_pfn) > > > { > > > struct vfio_iommu *iommu = iommu_data; > > > + struct vfio_group *group; > > > int i, j, ret; > > > unsigned long remote_vaddr; > > > struct vfio_dma *dma; > > > @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, > > > (vpfn->iova - dma->iova) >> pgshift, 1); > > > } > > > } > > > > Could you provide an interface lightweight than vfio_pin_pages for pass-through > > devices? e.g. vfio_mark_iova_dirty() > > > > Or at least allowing phys_pfn to be empty for pass-through devices. > > > > This is really inefficient: > > bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1)); > > i.e. > > in order to mark an iova dirty, it has to go through iova ---> pfn --> iova > > while acquiring pfn is not necessary for pass-through devices. > > I think this would be possible, but I don't think it should be gating > to this series. We don't have such consumers yet. Thanks, > ok. Reasonable. Thanks Yan
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 210fcf426643..311b5e4e111e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -85,6 +85,7 @@ struct vfio_group { atomic_t opened; wait_queue_head_t container_q; bool noiommu; + unsigned int dev_counter; struct kvm *kvm; struct blocking_notifier_head notifier; }; @@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct vfio_group *group, mutex_lock(&group->device_lock); list_add(&device->group_next, &group->device_list); + group->dev_counter++; mutex_unlock(&group->device_lock); return device; @@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref) struct vfio_group *group = device->group; list_del(&device->group_next); + group->dev_counter--; mutex_unlock(&group->device_lock); dev_set_drvdata(device->dev, NULL); @@ -1933,6 +1936,9 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, if (!group) return -ENODEV; + if (group->dev_counter > 1) + return -EINVAL; + ret = vfio_group_add_container_user(group); if (ret) goto err_pin_pages; @@ -1940,7 +1946,8 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage, container = group->container; driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, user_pfn, + ret = driver->ops->pin_pages(container->iommu_data, + group->iommu_group, user_pfn, npage, prot, phys_pfn); else ret = -ENOTTY; @@ -2038,8 +2045,8 @@ int vfio_group_pin_pages(struct vfio_group *group, driver = container->iommu_driver; if (likely(driver && driver->ops->pin_pages)) ret = driver->ops->pin_pages(container->iommu_data, - user_iova_pfn, npage, - prot, phys_pfn); + group->iommu_group, user_iova_pfn, + npage, prot, phys_pfn); else ret = -ENOTTY; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 912629320719..deec09f4b0f6 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -72,6 +72,7 @@ struct vfio_iommu { bool v2; bool nesting; bool dirty_page_tracking; + bool pinned_page_dirty_scope; }; struct vfio_domain { @@ -99,6 +100,7 @@ struct vfio_group { struct iommu_group *iommu_group; struct list_head next; bool mdev_group; /* An mdev group */ + bool pinned_page_dirty_scope; }; struct vfio_iova { @@ -132,6 +134,10 @@ struct vfio_regions { static int put_pfn(unsigned long pfn, int prot); static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu); +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group); + +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); /* * This code handles mapping and unmapping of user data buffers * into DMA'ble space using the IOMMU @@ -556,11 +562,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, } static int vfio_iommu_type1_pin_pages(void *iommu_data, + struct iommu_group *iommu_group, unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn) { struct vfio_iommu *iommu = iommu_data; + struct vfio_group *group; int i, j, ret; unsigned long remote_vaddr; struct vfio_dma *dma; @@ -630,8 +638,14 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, (vpfn->iova - dma->iova) >> pgshift, 1); } } - ret = i; + + group = vfio_iommu_find_iommu_group(iommu, iommu_group); + if (!group->pinned_page_dirty_scope) { + group->pinned_page_dirty_scope = true; + update_pinned_page_dirty_scope(iommu); + } + goto pin_done; pin_unwind: @@ -913,8 +927,11 @@ static int vfio_iova_dirty_bitmap(struct vfio_iommu *iommu, dma_addr_t iova, npages = dma->size >> pgshift; bitmap_size = DIRTY_BITMAP_BYTES(npages); - /* mark all pages dirty if all pages are pinned and mapped. */ - if (dma->iommu_mapped) + /* + * mark all pages dirty if any IOMMU capable device is not able + * to report dirty pages and all pages are pinned and mapped. + */ + if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) bitmap_set(dma->bitmap, 0, npages); if (copy_to_user((void __user *)bitmap, dma->bitmap, bitmap_size)) @@ -1393,6 +1410,51 @@ static struct vfio_group *find_iommu_group(struct vfio_domain *domain, return NULL; } +static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, + struct iommu_group *iommu_group) +{ + struct vfio_domain *domain; + struct vfio_group *group = NULL; + + list_for_each_entry(domain, &iommu->domain_list, next) { + group = find_iommu_group(domain, iommu_group); + if (group) + return group; + } + + if (iommu->external_domain) + group = find_iommu_group(iommu->external_domain, iommu_group); + + return group; +} + +static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) +{ + struct vfio_domain *domain; + struct vfio_group *group; + + list_for_each_entry(domain, &iommu->domain_list, next) { + list_for_each_entry(group, &domain->group_list, next) { + if (!group->pinned_page_dirty_scope) { + iommu->pinned_page_dirty_scope = false; + return; + } + } + } + + if (iommu->external_domain) { + domain = iommu->external_domain; + list_for_each_entry(group, &domain->group_list, next) { + if (!group->pinned_page_dirty_scope) { + iommu->pinned_page_dirty_scope = false; + return; + } + } + } + + iommu->pinned_page_dirty_scope = true; +} + static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, phys_addr_t *base) { @@ -1799,6 +1861,9 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &iommu->external_domain->group_list); + group->pinned_page_dirty_scope = true; + if (!iommu->pinned_page_dirty_scope) + update_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); return 0; @@ -1921,6 +1986,7 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, done: /* Delete the old one and insert new iova list */ vfio_iommu_iova_insert_copy(iommu, &iova_copy); + iommu->pinned_page_dirty_scope = false; mutex_unlock(&iommu->lock); vfio_iommu_resv_free(&group_resv_regions); @@ -2073,6 +2139,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, struct vfio_iommu *iommu = iommu_data; struct vfio_domain *domain; struct vfio_group *group; + bool update_dirty_scope = false; LIST_HEAD(iova_copy); mutex_lock(&iommu->lock); @@ -2080,6 +2147,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, if (iommu->external_domain) { group = find_iommu_group(iommu->external_domain, iommu_group); if (group) { + update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); @@ -2109,6 +2177,7 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, continue; vfio_iommu_detach_group(domain, group); + update_dirty_scope = !group->pinned_page_dirty_scope; list_del(&group->next); kfree(group); /* @@ -2139,6 +2208,8 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, vfio_iommu_iova_free(&iova_copy); detach_group_done: + if (update_dirty_scope) + update_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); } diff --git a/include/linux/vfio.h b/include/linux/vfio.h index be2bd358b952..702e1d7b6e8b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -72,7 +72,9 @@ struct vfio_iommu_driver_ops { struct iommu_group *group); void (*detach_group)(void *iommu_data, struct iommu_group *group); - int (*pin_pages)(void *iommu_data, unsigned long *user_pfn, + int (*pin_pages)(void *iommu_data, + struct iommu_group *group, + unsigned long *user_pfn, int npage, int prot, unsigned long *phys_pfn); int (*unpin_pages)(void *iommu_data,