@@ -198,7 +198,6 @@ static void tce_iommu_release(void *iommu_data)
struct iommu_table *tbl = container->tbl;
WARN_ON(tbl && !tbl->it_group);
- tce_iommu_disable(container);
if (tbl) {
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
@@ -206,63 +205,97 @@ static void tce_iommu_release(void *iommu_data)
if (tbl->it_group)
tce_iommu_detach_group(iommu_data, tbl->it_group);
}
+
+ tce_iommu_disable(container);
+
mutex_destroy(&container->lock);
kfree(container);
}
+static void tce_iommu_unuse_page(struct tce_container *container,
+ unsigned long oldtce)
+{
+ struct page *page;
+
+ if (!(oldtce & (TCE_PCI_READ | TCE_PCI_WRITE)))
+ return;
+
+ /*
+ * VFIO cannot map/unmap when a container is not enabled so
+ * we would not need this check but KVM could map/unmap and if
+ * this happened, we must not put pages as KVM does not get them as
+ * it expects memory pre-registation to do this part.
+ */
+ if (!container->enabled)
+ return;
+
+ page = pfn_to_page(__pa(oldtce) >> PAGE_SHIFT);
+
+ if (oldtce & TCE_PCI_WRITE)
+ SetPageDirty(page);
+
+ put_page(page);
+}
+
static int tce_iommu_clear(struct tce_container *container,
struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
unsigned long oldtce;
- struct page *page;
for ( ; pages; --pages, ++entry) {
oldtce = iommu_clear_tce(tbl, entry);
if (!oldtce)
continue;
- page = pfn_to_page(oldtce >> PAGE_SHIFT);
- WARN_ON(!page);
- if (page) {
- if (oldtce & TCE_PCI_WRITE)
- SetPageDirty(page);
- put_page(page);
- }
+ tce_iommu_unuse_page(container, (unsigned long) __va(oldtce));
}
return 0;
}
+static int tce_get_hva(struct tce_container *container,
+ unsigned page_shift, unsigned long tce, unsigned long *hva)
+{
+ struct page *page = NULL;
+ enum dma_data_direction direction = iommu_tce_direction(tce);
+
+ if (get_user_pages_fast(tce & PAGE_MASK, 1,
+ direction != DMA_TO_DEVICE, &page) != 1)
+ return -EFAULT;
+
+ *hva = (unsigned long) page_address(page);
+
+ return 0;
+}
+
static long tce_iommu_build(struct tce_container *container,
struct iommu_table *tbl,
unsigned long entry, unsigned long tce, unsigned long pages)
{
long i, ret = 0;
- struct page *page = NULL;
+ struct page *page;
unsigned long hva;
enum dma_data_direction direction = iommu_tce_direction(tce);
for (i = 0; i < pages; ++i) {
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page);
- if (unlikely(ret != 1)) {
- ret = -EFAULT;
+ ret = tce_get_hva(container, tbl->it_page_shift, tce, &hva);
+ if (ret)
break;
- }
+ page = pfn_to_page(__pa(hva) >> PAGE_SHIFT);
if (!tce_page_is_contained(page, tbl->it_page_shift)) {
ret = -EPERM;
break;
}
- hva = (unsigned long) page_address(page) +
- (tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK);
+ /* Preserve offset within IOMMU page */
+ hva |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
ret = iommu_tce_build(tbl, entry + i, hva, direction);
if (ret) {
- put_page(page);
+ tce_iommu_unuse_page(container, hva);
pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
__func__, entry << tbl->it_page_shift,
tce, ret);
This is a pretty mechanical patch to make next patches simpler. New tce_iommu_unuse_page() helper does put_page() now but it might skip that after the memory registering patch applied. As we are here, this removes unnecessary checks for a value returned by pfn_to_page() as it cannot possibly return NULL. This moves tce_iommu_disable() later to let tce_iommu_clear() know if the container has been enabled because if it has not been, then put_page() must not be called on TCEs from the TCE table. This situation is not yet possible but it will after KVM acceleration patchset is applied. Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> --- Changes: v6: * tce_get_hva() returns hva via a pointer --- drivers/vfio/vfio_iommu_spapr_tce.c | 69 +++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 18 deletions(-)