@@ -384,7 +384,7 @@ void kill_dev_dax(struct dev_dax *dev_dax)
struct inode *inode = dax_inode(dax_dev);
kill_dax(dax_dev);
- unmap_mapping_range(inode->i_mapping, 0, 0, 1);
+ truncate_inode_pages(inode->i_mapping, 0);
/*
* Dynamic dax region have the pgmap allocated via dev_kzalloc()
@@ -73,38 +73,15 @@ __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
return -1;
}
-static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
- unsigned long fault_size)
-{
- unsigned long i, nr_pages = fault_size / PAGE_SIZE;
- struct file *filp = vmf->vma->vm_file;
- struct dev_dax *dev_dax = filp->private_data;
- pgoff_t pgoff;
-
- /* mapping is only set on the head */
- if (dev_dax->pgmap->vmemmap_shift)
- nr_pages = 1;
-
- pgoff = linear_page_index(vmf->vma,
- ALIGN(vmf->address, fault_size));
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
-
- page = compound_head(page);
- if (page->mapping)
- continue;
-
- page->mapping = filp->f_mapping;
- page->index = pgoff + i;
- }
-}
-
static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf)
{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
+ vm_fault_t ret;
+ void *entry;
pfn_t pfn;
unsigned int fault_size = PAGE_SIZE;
@@ -128,7 +105,16 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- dax_set_mapping(vmf, pfn, fault_size);
+ entry = dax_grab_mapping_entry(&xas, mapping, 0);
+ if (xa_is_internal(entry))
+ return xa_to_internal(entry);
+
+ ret = dax_insert_entry(&xas, vmf, &entry, pfn, 0);
+
+ dax_unlock_entry(&xas, entry);
+
+ if (ret)
+ return ret;
return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
}
@@ -136,10 +122,14 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf)
{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
+ XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
+ vm_fault_t ret;
pgoff_t pgoff;
+ void *entry;
pfn_t pfn;
unsigned int fault_size = PMD_SIZE;
@@ -171,7 +161,16 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- dax_set_mapping(vmf, pfn, fault_size);
+ entry = dax_grab_mapping_entry(&xas, mapping, PMD_ORDER);
+ if (xa_is_internal(entry))
+ return xa_to_internal(entry);
+
+ ret = dax_insert_entry(&xas, vmf, &entry, pfn, DAX_PMD);
+
+ dax_unlock_entry(&xas, entry);
+
+ if (ret)
+ return ret;
return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
}
@@ -180,10 +179,14 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf)
{
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
unsigned long pud_addr = vmf->address & PUD_MASK;
+ XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
struct device *dev = &dev_dax->dev;
phys_addr_t phys;
+ vm_fault_t ret;
pgoff_t pgoff;
+ void *entry;
pfn_t pfn;
unsigned int fault_size = PUD_SIZE;
@@ -216,7 +219,16 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
- dax_set_mapping(vmf, pfn, fault_size);
+ entry = dax_grab_mapping_entry(&xas, mapping, PUD_ORDER);
+ if (xa_is_internal(entry))
+ return xa_to_internal(entry);
+
+ ret = dax_insert_entry(&xas, vmf, &entry, pfn, DAX_PUD);
+
+ dax_unlock_entry(&xas, entry);
+
+ if (ret)
+ return ret;
return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
}
@@ -494,3 +506,4 @@ MODULE_LICENSE("GPL v2");
module_init(dax_init);
module_exit(dax_exit);
MODULE_ALIAS_DAX_DEVICE(0);
+MODULE_IMPORT_NS(DAX);
@@ -261,6 +261,7 @@ void dax_unlock_entry(struct xa_state *xas, void *entry)
WARN_ON(!dax_is_locked(old));
dax_wake_entry(xas, entry, WAKE_NEXT);
}
+EXPORT_SYMBOL_NS_GPL(dax_unlock_entry, DAX);
/*
* Return: The entry stored at this location before it was locked.
@@ -674,6 +675,7 @@ void *dax_grab_mapping_entry(struct xa_state *xas,
xas_unlock_irq(xas);
return xa_mk_internal(VM_FAULT_FALLBACK);
}
+EXPORT_SYMBOL_NS_GPL(dax_grab_mapping_entry, DAX);
/**
* dax_layout_pinned_page_range - find first pinned page in @mapping
@@ -875,6 +877,7 @@ vm_fault_t dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
*pentry = entry;
return 0;
}
+EXPORT_SYMBOL_NS_GPL(dax_insert_entry, DAX);
int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
struct address_space *mapping, void *entry)
Track entries and take pgmap references at mapping insertion time. Revoke mappings and drop the associated pgmap references at device destruction or inode eviction time. With this in place, and the fsdax equivalent already in place, the gup code no longer needs to consider PTE_DEVMAP as an indicator to get a pgmap reference before taking a page reference. In other words, GUP takes additional references on mapped pages. Until now, DAX in all its forms was failing to take references at mapping time. With that fixed there is no longer a requirement for the gup to manage @pgmap references. That cleanup is saved for a follow-on patch. Cc: Matthew Wilcox <willy@infradead.org> Cc: Jan Kara <jack@suse.cz> Cc: "Darrick J. Wong" <djwong@kernel.org> Cc: Jason Gunthorpe <jgg@nvidia.com> Cc: Christoph Hellwig <hch@lst.de> Cc: John Hubbard <jhubbard@nvidia.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/dax/bus.c | 2 + drivers/dax/device.c | 73 +++++++++++++++++++++++++++++-------------------- drivers/dax/mapping.c | 3 ++ 3 files changed, 47 insertions(+), 31 deletions(-)