diff mbox series

[RFC,07/10] device-dax: Add support for PFN_SPECIAL flags

Message ID 20200110190313.17144-8-joao.m.martins@oracle.com (mailing list archive)
State New, archived
Headers show
Series [RFC,01/10] mm: Add pmd support for _PAGE_SPECIAL | expand

Commit Message

Joao Martins Jan. 10, 2020, 7:03 p.m. UTC
Right now we assume there's gonna be a PFN_DEV|PFN_MAP which
means it will have a struct page backing the PFN but that is
not placed in normal system RAM zones.

Add support for PFN_DEV|PFN_SPECIAL only and therefore the
underlying vma won't have a struct page. For device dax, this
means not assuming callers will pass a dev_pagemap, and avoid
returning SIGBUS for the lack of PFN_MAP region pfn flag and
finally not setting struct page index/mapping on fault.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/dax/bus.c    |  3 ++-
 drivers/dax/device.c | 40 ++++++++++++++++++++++------------------
 2 files changed, 24 insertions(+), 19 deletions(-)
diff mbox series

Patch

diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 46e46047a1f7..96ca3ac85278 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -414,7 +414,8 @@  struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id,
 	if (!dev_dax)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
+	if (pgmap)
+		memcpy(&dev_dax->pgmap, pgmap, sizeof(*pgmap));
 
 	/*
 	 * No 'host' or dax_operations since there is no access to this
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 113a554de3ee..aa38f5ff180a 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -14,6 +14,12 @@ 
 #include "dax-private.h"
 #include "bus.h"
 
+static int dax_is_pfn_special(struct dev_dax *dev_dax)
+{
+	return (dev_dax->region->pfn_flags &
+		(PFN_DEV|PFN_SPECIAL)) == (PFN_DEV|PFN_SPECIAL);
+}
+
 static int dax_is_pfn_dev(struct dev_dax *dev_dax)
 {
 	return (dev_dax->region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV;
@@ -104,6 +110,7 @@  static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
 	struct dax_region *dax_region;
 	phys_addr_t phys;
 	unsigned int fault_size = PAGE_SIZE;
+	int rc;
 
 	if (check_vma(dev_dax, vmf->vma, __func__))
 		return VM_FAULT_SIGBUS;
@@ -126,7 +133,12 @@  static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
 
 	*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+	if (dax_is_pfn_special(dev_dax))
+		rc = vmf_insert_pfn(vmf->vma, vmf->address, pfn_t_to_pfn(*pfn));
+	else
+		rc = vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+
+	return rc;
 }
 
 static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
@@ -149,12 +161,6 @@  static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
 		return VM_FAULT_SIGBUS;
 	}
 
-	/* dax pmd mappings require pfn_t_devmap() */
-	if (!dax_is_pfn_map(dev_dax)) {
-		dev_dbg(dev, "region lacks devmap flags\n");
-		return VM_FAULT_SIGBUS;
-	}
-
 	if (fault_size < dax_region->align)
 		return VM_FAULT_SIGBUS;
 	else if (fault_size > dax_region->align)
@@ -199,12 +205,6 @@  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 		return VM_FAULT_SIGBUS;
 	}
 
-	/* dax pud mappings require pfn_t_devmap() */
-	if (!dax_is_pfn_map(dev_dax)) {
-		dev_dbg(dev, "region lacks devmap flags\n");
-		return VM_FAULT_SIGBUS;
-	}
-
 	if (fault_size < dax_region->align)
 		return VM_FAULT_SIGBUS;
 	else if (fault_size > dax_region->align)
@@ -266,7 +266,7 @@  static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		rc = VM_FAULT_SIGBUS;
 	}
 
-	if (rc == VM_FAULT_NOPAGE) {
+	if (dax_is_pfn_map(dev_dax) && (rc == VM_FAULT_NOPAGE)) {
 		unsigned long i;
 		pgoff_t pgoff;
 
@@ -344,6 +344,8 @@  static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_ops = &dax_vm_ops;
 	vma->vm_flags |= VM_HUGEPAGE;
+	if (dax_is_pfn_special(dev_dax))
+		vma->vm_flags |= VM_PFNMAP;
 	return 0;
 }
 
@@ -450,10 +452,12 @@  int dev_dax_probe(struct device *dev)
 		return -EBUSY;
 	}
 
-	dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
-	addr = devm_memremap_pages(dev, &dev_dax->pgmap);
-	if (IS_ERR(addr))
-		return PTR_ERR(addr);
+	if (dax_is_pfn_map(dev_dax)) {
+		dev_dax->pgmap.type = MEMORY_DEVICE_DEVDAX;
+		addr = devm_memremap_pages(dev, &dev_dax->pgmap);
+		if (IS_ERR(addr))
+			return PTR_ERR(addr);
+	}
 
 	inode = dax_inode(dax_dev);
 	cdev = inode->i_cdev;