[03/11] device-dax: enable page_mapping()
diff mbox

Message ID 152699998750.24093.5270058390086110946.stgit@dwillia2-desk3.amr.corp.intel.com
State New, archived
Headers show

Commit Message

Dan Williams May 22, 2018, 2:39 p.m. UTC
In support of enabling memory_failure() handling for device-dax
mappings, set the ->mapping association of pages backing device-dax
mappings. The rmap implementation requires page_mapping() to return the
address_space hosting the vmas that map the page.

The ->mapping pointer is never cleared. There is no possibility for the
page to become associated with another address_space while the device is
enabled. When the device is disabled the 'struct page' array for the
device is destroyed / later reinitialized to zero.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device.c |   47 +++++++++++++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 16 deletions(-)

Comments

Jan Kara May 23, 2018, 9:03 a.m. UTC | #1
On Tue 22-05-18 07:39:47, Dan Williams wrote:
> In support of enabling memory_failure() handling for device-dax
> mappings, set the ->mapping association of pages backing device-dax
> mappings. The rmap implementation requires page_mapping() to return the
> address_space hosting the vmas that map the page.
> 
> The ->mapping pointer is never cleared. There is no possibility for the
> page to become associated with another address_space while the device is
> enabled. When the device is disabled the 'struct page' array for the
> device is destroyed / later reinitialized to zero.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
...
> @@ -402,17 +401,33 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
>  	id = dax_read_lock();
>  	switch (pe_size) {
>  	case PE_SIZE_PTE:
> -		rc = __dev_dax_pte_fault(dev_dax, vmf);
> +		fault_size = PAGE_SIZE;
> +		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
>  		break;
>  	case PE_SIZE_PMD:
> -		rc = __dev_dax_pmd_fault(dev_dax, vmf);
> +		fault_size = PMD_SIZE;
> +		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
>  		break;
>  	case PE_SIZE_PUD:
> -		rc = __dev_dax_pud_fault(dev_dax, vmf);
> +		fault_size = PUD_SIZE;
> +		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
>  		break;
>  	default:
>  		rc = VM_FAULT_SIGBUS;
>  	}
> +
> +	if (rc == VM_FAULT_NOPAGE) {
> +		unsigned long i;
> +
> +		for (i = 0; i < fault_size / PAGE_SIZE; i++) {
> +			struct page *page;
> +
> +			page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
> +			if (page->mapping)
> +				continue;
> +			page->mapping = filp->f_mapping;
> +		}
> +	}
>  	dax_read_unlock(id);

Careful here. Page fault can return VM_FAULT_NOPAGE even if we raced with
somebody modifying the PTE or if we installed a zero page. With shared DAX
mappings (and device dax does not allow anything else if I'm right) this
does not matter as given file offset is guaranteed to always map to the
same page but I think it deserves a comment.

								Honza
kernel test robot May 30, 2018, 7:54 p.m. UTC | #2
Hi Dan,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.17-rc7 next-20180530]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Dan-Williams/mm-Teach-memory_failure-about-ZONE_DEVICE-pages/20180525-035652
config: arm64-allmodconfig (attached as .config)
compiler: aarch64-linux-gnu-gcc (Debian 7.2.0-11) 7.2.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm64 

All errors (new ones prefixed by >>):

   drivers//dax/device.c: In function 'dev_dax_huge_fault':
>> drivers//dax/device.c:413:8: error: too many arguments to function '__dev_dax_pud_fault'
      rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
           ^~~~~~~~~~~~~~~~~~~
   drivers//dax/device.c:380:19: note: declared here
    static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
                      ^~~~~~~~~~~~~~~~~~~

vim +/__dev_dax_pud_fault +413 drivers//dax/device.c

   386	
   387	static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
   388			enum page_entry_size pe_size)
   389	{
   390		struct vm_area_struct *vma = vmf->vma;
   391		struct file *filp = vma->vm_file;
   392		unsigned long fault_size;
   393		int rc, id;
   394		pfn_t pfn;
   395		struct dev_dax *dev_dax = filp->private_data;
   396	
   397		dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
   398				(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
   399				vma->vm_start, vma->vm_end, pe_size);
   400	
   401		id = dax_read_lock();
   402		switch (pe_size) {
   403		case PE_SIZE_PTE:
   404			fault_size = PAGE_SIZE;
   405			rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
   406			break;
   407		case PE_SIZE_PMD:
   408			fault_size = PMD_SIZE;
   409			rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
   410			break;
   411		case PE_SIZE_PUD:
   412			fault_size = PUD_SIZE;
 > 413			rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
   414			break;
   415		default:
   416			rc = VM_FAULT_SIGBUS;
   417		}
   418	
   419		if (rc == VM_FAULT_NOPAGE) {
   420			unsigned long i;
   421	
   422			for (i = 0; i < fault_size / PAGE_SIZE; i++) {
   423				struct page *page;
   424	
   425				page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
   426				if (page->mapping)
   427					continue;
   428				page->mapping = filp->f_mapping;
   429			}
   430		}
   431		dax_read_unlock(id);
   432	
   433		return rc;
   434	}
   435	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Patch
diff mbox

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 686de08e120b..8e986478d48d 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -245,13 +245,12 @@  __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 }
 
 static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
-				struct vm_fault *vmf)
+				struct vm_fault *vmf, pfn_t *pfn)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct device *dev = &dev_dax->dev;
 	struct dax_region *dax_region;
 	phys_addr_t phys;
-	pfn_t pfn;
 	unsigned int fault_size = PAGE_SIZE;
 
 	if (check_vma(dev_dax, vma, __func__))
@@ -273,13 +272,13 @@  static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
 		return VM_FAULT_SIGBUS;
 	}
 
-	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+	*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	return vmf_insert_mixed(vma, vmf->address, pfn);
+	return vmf_insert_mixed(vma, vmf->address, *pfn);
 }
 
 static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
-				struct vm_fault *vmf)
+				struct vm_fault *vmf, pfn_t *pfn)
 {
 	unsigned long pmd_addr = vmf->address & PMD_MASK;
 	struct vm_area_struct *vma = vmf->vma;
@@ -287,7 +286,6 @@  static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
 	struct dax_region *dax_region;
 	phys_addr_t phys;
 	pgoff_t pgoff;
-	pfn_t pfn;
 	unsigned int fault_size = PMD_SIZE;
 
 	if (check_vma(dev_dax, vma, __func__))
@@ -322,15 +320,15 @@  static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
 		return VM_FAULT_SIGBUS;
 	}
 
-	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+	*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
+	return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, *pfn,
 			vmf->flags & FAULT_FLAG_WRITE);
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-				struct vm_fault *vmf)
+				struct vm_fault *vmf, pfn_t *pfn)
 {
 	unsigned long pud_addr = vmf->address & PUD_MASK;
 	struct vm_area_struct *vma = vmf->vma;
@@ -338,7 +336,6 @@  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 	struct dax_region *dax_region;
 	phys_addr_t phys;
 	pgoff_t pgoff;
-	pfn_t pfn;
 	unsigned int fault_size = PUD_SIZE;
 
 
@@ -374,9 +371,9 @@  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 		return VM_FAULT_SIGBUS;
 	}
 
-	pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
+	*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
 
-	return vmf_insert_pfn_pud(vma, vmf->address, vmf->pud, pfn,
+	return vmf_insert_pfn_pud(vma, vmf->address, vmf->pud, *pfn,
 			vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
@@ -390,9 +387,11 @@  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
-	int rc, id;
 	struct vm_area_struct *vma = vmf->vma;
 	struct file *filp = vma->vm_file;
+	unsigned long fault_size;
+	int rc, id;
+	pfn_t pfn;
 	struct dev_dax *dev_dax = filp->private_data;
 
 	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
@@ -402,17 +401,33 @@  static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 	id = dax_read_lock();
 	switch (pe_size) {
 	case PE_SIZE_PTE:
-		rc = __dev_dax_pte_fault(dev_dax, vmf);
+		fault_size = PAGE_SIZE;
+		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
 		break;
 	case PE_SIZE_PMD:
-		rc = __dev_dax_pmd_fault(dev_dax, vmf);
+		fault_size = PMD_SIZE;
+		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
 		break;
 	case PE_SIZE_PUD:
-		rc = __dev_dax_pud_fault(dev_dax, vmf);
+		fault_size = PUD_SIZE;
+		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
 		break;
 	default:
 		rc = VM_FAULT_SIGBUS;
 	}
+
+	if (rc == VM_FAULT_NOPAGE) {
+		unsigned long i;
+
+		for (i = 0; i < fault_size / PAGE_SIZE; i++) {
+			struct page *page;
+
+			page = pfn_to_page(pfn_t_to_pfn(pfn) + i);
+			if (page->mapping)
+				continue;
+			page->mapping = filp->f_mapping;
+		}
+	}
 	dax_read_unlock(id);
 
 	return rc;