@@ -192,6 +192,42 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
}
#endif /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
+static void set_page_mapping(struct vm_fault *vmf, pfn_t pfn,
+ unsigned long fault_size,
+ struct address_space *f_mapping)
+{
+ unsigned long i;
+ pgoff_t pgoff;
+
+ pgoff = linear_page_index(vmf->vma, ALIGN(vmf->address, fault_size));
+
+ for (i = 0; i < fault_size / PAGE_SIZE; i++) {
+ struct page *page;
+
+ page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+ if (page->mapping)
+ continue;
+ page->mapping = f_mapping;
+ page->index = pgoff + i;
+ }
+}
+
+static void set_compound_mapping(struct vm_fault *vmf, pfn_t pfn,
+ unsigned long fault_size,
+ struct address_space *f_mapping)
+{
+ struct page *head;
+
+ head = pfn_to_page(pfn_t_to_pfn(pfn));
+ head = compound_head(head);
+ if (head->mapping)
+ return;
+
+ head->mapping = f_mapping;
+ head->index = linear_page_index(vmf->vma,
+ ALIGN(vmf->address, fault_size));
+}
+
static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size)
{
@@ -225,8 +261,7 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
}
if (rc == VM_FAULT_NOPAGE) {
- unsigned long i;
- pgoff_t pgoff;
+ struct dev_pagemap *pgmap = dev_dax->pgmap;
/*
* In the device-dax case the only possibility for a
@@ -234,17 +269,10 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
* mapped. No need to consider the zero page, or racing
* conflicting mappings.
*/
- pgoff = linear_page_index(vmf->vma,
- ALIGN(vmf->address, fault_size));
- for (i = 0; i < fault_size / PAGE_SIZE; i++) {
- struct page *page;
-
- page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
- if (page->mapping)
- continue;
- page->mapping = filp->f_mapping;
- page->index = pgoff + i;
- }
+ if (pgmap_geometry(pgmap) > PAGE_SIZE)
+ set_compound_mapping(vmf, pfn, fault_size, filp->f_mapping);
+ else
+ set_page_mapping(vmf, pfn, fault_size, filp->f_mapping);
}
dax_read_unlock(id);
@@ -426,6 +454,8 @@ int dev_dax_probe(struct dev_dax *dev_dax)
}
pgmap->type = MEMORY_DEVICE_GENERIC;
+ if (dev_dax->align > PAGE_SIZE)
+ pgmap->geometry = dev_dax->align;
dev_dax->pgmap = pgmap;
addr = devm_memremap_pages(dev, pgmap);
Use the newly added compound pagemap facility which maps the assigned dax ranges as compound pages at a page size of @align. Currently, this means, that region/namespace bootstrap would take considerably less, given that you would initialize considerably less pages. On setups with 128G NVDIMMs the initialization with DRAM stored struct pages improves from ~268-358 ms to ~78-100 ms with 2M pages, and to less than a 1msec with 1G pages. dax devices are created with a fixed @align (huge page size) which is enforced through as well at mmap() of the device. Faults, consequently happen too at the specified @align specified at the creation, and those don't change through out dax device lifetime. MCEs poisons a whole dax huge page, as well as splits occurring at the configured page size. Signed-off-by: Joao Martins <joao.m.martins@oracle.com> --- drivers/dax/device.c | 56 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-)