diff mbox series

[v6,09/10] device-dax: set mapping prior to vmf_insert_pfn{,_pmd,pud}()

Message ID 20211124191005.20783-10-joao.m.martins@oracle.com (mailing list archive)
State New
Headers show
Series mm, device-dax: Introduce compound pages in devmap | expand

Commit Message

Joao Martins Nov. 24, 2021, 7:10 p.m. UTC
Normally, the @page mapping is set prior to inserting the page into a
page table entry. Make device-dax adhere to the same ordering, rather
than setting mapping after the PTE is inserted.

The address_space never changes and it is always associated with the
same inode and underlying pages. So, the page mapping is set once but
cleared when the struct pages are removed/freed (i.e. after
{devm_}memunmap_pages()).

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/dax/device.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

Comments

Joao Martins Nov. 25, 2021, 11:42 a.m. UTC | #1
On 11/24/21 19:10, Joao Martins wrote:
> Normally, the @page mapping is set prior to inserting the page into a
> page table entry. Make device-dax adhere to the same ordering, rather
> than setting mapping after the PTE is inserted.
> 
> The address_space never changes and it is always associated with the
> same inode and underlying pages. So, the page mapping is set once but
> cleared when the struct pages are removed/freed (i.e. after
> {devm_}memunmap_pages()).
> 
> Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>  drivers/dax/device.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/dax/device.c b/drivers/dax/device.c
> index 9c87927d4bc2..0ef9fecec005 100644
> --- a/drivers/dax/device.c
> +++ b/drivers/dax/device.c
> @@ -121,6 +121,8 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
>  
>  	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
>  
> +	dax_set_mapping(vmf, *pfn, fault_size);
> +
>  	return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
>  }
>  
> @@ -161,6 +163,8 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
>  
>  	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
>  
> +	dax_set_mapping(vmf, *pfn, fault_size);
> +
>  	return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
>  }
>  
> @@ -203,6 +207,8 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
>  
>  	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
>  
> +	dax_set_mapping(vmf, *pfn, fault_size);
> +
>  	return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
>  }
>  #else
> @@ -245,8 +251,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
>  		rc = VM_FAULT_SIGBUS;
>  	}
>  
> -	if (rc == VM_FAULT_NOPAGE)
> -		dax_set_mapping(vmf, pfn, fault_size);
>  	dax_read_unlock(id);
>  
>  	return rc;
> 
This last chunk is going to spoof out a new warning because @fault_size in
dev_dax_huge_fault stops being used after this patch.
I've added below chunk for the next version (in addition to Christoph comments in
patch 4):

@@ -217,7 +223,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
                enum page_entry_size pe_size)
 {
        struct file *filp = vmf->vma->vm_file;
-       unsigned long fault_size;
        vm_fault_t rc = VM_FAULT_SIGBUS;
        int id;
        pfn_t pfn;
@@ -230,23 +235,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
        id = dax_read_lock();
        switch (pe_size) {
        case PE_SIZE_PTE:
-               fault_size = PAGE_SIZE;
                rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
                break;
        case PE_SIZE_PMD:
-               fault_size = PMD_SIZE;
                rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
                break;
        case PE_SIZE_PUD:
-               fault_size = PUD_SIZE;
                rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
                break;
        default:
                rc = VM_FAULT_SIGBUS;
        }
Joao Martins Nov. 26, 2021, 6:39 p.m. UTC | #2
On 11/25/21 11:42, Joao Martins wrote:
> On 11/24/21 19:10, Joao Martins wrote:
>> @@ -245,8 +251,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
>>  		rc = VM_FAULT_SIGBUS;
>>  	}
>>  
>> -	if (rc == VM_FAULT_NOPAGE)
>> -		dax_set_mapping(vmf, pfn, fault_size);
>>  	dax_read_unlock(id);
>>  
>>  	return rc;
>>
> This last chunk is going to spoof out a new warning because @fault_size in
> dev_dax_huge_fault stops being used after this patch.
> I've added below chunk for the next version (in addition to Christoph comments in
> patch 4):
> 

Re-attached as a replacement patch below scissors line.

As mentioned earlier, I'll be respinning v7 series with the comments I got on patch 4 and
this replacement below. But given the build warning yesterday&today, figured I
preemptively attach a replacement for it.

----->8-----

From 2f4cb25c0a6546a27ced4981f0963546f386caec Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Tue, 23 Nov 2021 06:00:38 -0500
Subject: [PATCH] device-dax: set mapping prior to vmf_insert_pfn{,_pmd,pud}()

Normally, the @page mapping is set prior to inserting the page into a
page table entry. Make device-dax adhere to the same ordering, rather
than setting mapping after the PTE is inserted.

The address_space never changes and it is always associated with the
same inode and underlying pages. So, the page mapping is set once but
cleared when the struct pages are removed/freed (i.e. after
{devm_}memunmap_pages()).

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/dax/device.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 9c87927d4bc2..19a6b86486ce 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -121,6 +121,8 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,

 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
 }

@@ -161,6 +163,8 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,

 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 }

@@ -203,6 +207,8 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,

 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
@@ -217,7 +223,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		enum page_entry_size pe_size)
 {
 	struct file *filp = vmf->vma->vm_file;
-	unsigned long fault_size;
 	vm_fault_t rc = VM_FAULT_SIGBUS;
 	int id;
 	pfn_t pfn;
@@ -230,23 +235,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 	id = dax_read_lock();
 	switch (pe_size) {
 	case PE_SIZE_PTE:
-		fault_size = PAGE_SIZE;
 		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
 		break;
 	case PE_SIZE_PMD:
-		fault_size = PMD_SIZE;
 		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
 		break;
 	case PE_SIZE_PUD:
-		fault_size = PUD_SIZE;
 		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
 		break;
 	default:
 		rc = VM_FAULT_SIGBUS;
 	}

-	if (rc == VM_FAULT_NOPAGE)
-		dax_set_mapping(vmf, pfn, fault_size);
 	dax_read_unlock(id);

 	return rc;
Christoph Hellwig Nov. 29, 2021, 7:32 a.m. UTC | #3
On Fri, Nov 26, 2021 at 06:39:39PM +0000, Joao Martins wrote:
> @@ -230,23 +235,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
>  	id = dax_read_lock();
>  	switch (pe_size) {
>  	case PE_SIZE_PTE:
> -		fault_size = PAGE_SIZE;
>  		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
>  		break;
>  	case PE_SIZE_PMD:
> -		fault_size = PMD_SIZE;
>  		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
>  		break;
>  	case PE_SIZE_PUD:
> -		fault_size = PUD_SIZE;
>  		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
>  		break;
>  	default:
>  		rc = VM_FAULT_SIGBUS;
>  	}
> 
>  	dax_read_unlock(id);

I wonder if if would make sense to move dax_read_lock / dax_read_unlock
іnto the individul helpers as well now.  That way you could directly
return from the switch.  Aso it seems like pfn is only an input
parameter now and doesn't need to be passed by reference.
Joao Martins Nov. 29, 2021, 3:49 p.m. UTC | #4
On 11/29/21 07:32, Christoph Hellwig wrote:
> On Fri, Nov 26, 2021 at 06:39:39PM +0000, Joao Martins wrote:
>> @@ -230,23 +235,18 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
>>  	id = dax_read_lock();
>>  	switch (pe_size) {
>>  	case PE_SIZE_PTE:
>> -		fault_size = PAGE_SIZE;
>>  		rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
>>  		break;
>>  	case PE_SIZE_PMD:
>> -		fault_size = PMD_SIZE;
>>  		rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
>>  		break;
>>  	case PE_SIZE_PUD:
>> -		fault_size = PUD_SIZE;
>>  		rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
>>  		break;
>>  	default:
>>  		rc = VM_FAULT_SIGBUS;
>>  	}
>>
>>  	dax_read_unlock(id);
> 
> I wonder if if would make sense to move dax_read_lock / dax_read_unlock
> іnto the individul helpers as well now.  That way you could directly
> return from the switch. 

Hmmm -- if by individual helpers moving to __dev_dax_{pte,pmd,pud}_fault()
it would be slightly less straighforward. Unless you might mean to move
to check_vma() (around the dax_alive() check) and that might actually
remove the opencoding of dax_read_lock in dax_mmap() even.

I would rather prefer that this cleanup around dax_read_{un,}lock is
a separate patch separate to this series, unless you feel strongly that
it needs to be part of this set.

> Aso it seems like pfn is only an input
> parameter now and doesn't need to be passed by reference.
> 
It's actually just an output parameter (that dax_set_mapping would then use).

The fault handlers in device-dax use vmf->address to calculate pfn that they
insert in the page table entry. After this patch we can actually just remove
@pfn argument.

	Joao
Christoph Hellwig Nov. 29, 2021, 4:48 p.m. UTC | #5
On Mon, Nov 29, 2021 at 03:49:46PM +0000, Joao Martins wrote:
> Hmmm -- if by individual helpers moving to __dev_dax_{pte,pmd,pud}_fault()
> it would be slightly less straighforward. Unless you might mean to move
> to check_vma() (around the dax_alive() check) and that might actually
> remove the opencoding of dax_read_lock in dax_mmap() even.
> 
> I would rather prefer that this cleanup around dax_read_{un,}lock is
> a separate patch separate to this series, unless you feel strongly that
> it needs to be part of this set.

Feel free to keep it as-is.
Joao Martins Nov. 29, 2021, 5:20 p.m. UTC | #6
On 11/29/21 15:49, Joao Martins wrote:
> On 11/29/21 07:32, Christoph Hellwig wrote:
>> On Fri, Nov 26, 2021 at 06:39:39PM +0000, Joao Martins wrote:
>> Aso it seems like pfn is only an input
>> parameter now and doesn't need to be passed by reference.
>>
> It's actually just an output parameter (that dax_set_mapping would then use).
> 
> The fault handlers in device-dax use vmf->address to calculate pfn that they
> insert in the page table entry. After this patch we can actually just remove
> @pfn argument.

I've added your suggestion as a cleanup patch between 9 and current 10 (11 in v7):

---->8----

From 999cec9efa757b82f435124518b042caeb51bde6 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Mon, 29 Nov 2021 11:12:00 -0500
Subject: [PATCH] device-dax: remove pfn from __dev_dax_{pte,pmd,pud}_fault()

After moving the page mapping to be set prior to pte insertion, the pfn
in dev_dax_huge_fault() no longer is necessary.  Remove it, as well as
the @pfn argument passed to the internal fault handler helpers.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 drivers/dax/device.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 19a6b86486ce..914368164e05 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -95,10 +95,11 @@ static void dax_set_mapping(struct vm_fault *vmf, pfn_t pfn,
 }

 static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
 {
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
+       pfn_t pfn;
        unsigned int fault_size = PAGE_SIZE;

        if (check_vma(dev_dax, vmf->vma, __func__))
@@ -119,20 +120,21 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
                return VM_FAULT_SIGBUS;
        }

-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

-       dax_set_mapping(vmf, *pfn, fault_size);
+       dax_set_mapping(vmf, pfn, fault_size);

-       return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
+       return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
 }

 static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
 {
        unsigned long pmd_addr = vmf->address & PMD_MASK;
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
        pgoff_t pgoff;
+       pfn_t pfn;
        unsigned int fault_size = PMD_SIZE;

        if (check_vma(dev_dax, vmf->vma, __func__))
@@ -161,21 +163,22 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
                return VM_FAULT_SIGBUS;
        }

-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

-       dax_set_mapping(vmf, *pfn, fault_size);
+       dax_set_mapping(vmf, pfn, fault_size);

-       return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       return vmf_insert_pfn_pmd(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
 }
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
-                               struct vm_fault *vmf, pfn_t *pfn)
+                               struct vm_fault *vmf)
 {
        unsigned long pud_addr = vmf->address & PUD_MASK;
        struct device *dev = &dev_dax->dev;
        phys_addr_t phys;
        pgoff_t pgoff;
+       pfn_t pfn;
        unsigned int fault_size = PUD_SIZE;


@@ -205,11 +208,11 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
                return VM_FAULT_SIGBUS;
        }

-       *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
+       pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);

-       dax_set_mapping(vmf, *pfn, fault_size);
+       dax_set_mapping(vmf, pfn, fault_size);

-       return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
+       return vmf_insert_pfn_pud(vmf, pfn, vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
 static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
@@ -225,7 +228,6 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
        struct file *filp = vmf->vma->vm_file;
        vm_fault_t rc = VM_FAULT_SIGBUS;
        int id;
-       pfn_t pfn;
        struct dev_dax *dev_dax = filp->private_data;

        dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
@@ -235,13 +237,13 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
        id = dax_read_lock();
        switch (pe_size) {
        case PE_SIZE_PTE:
-               rc = __dev_dax_pte_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pte_fault(dev_dax, vmf);
                break;
        case PE_SIZE_PMD:
-               rc = __dev_dax_pmd_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pmd_fault(dev_dax, vmf);
                break;
        case PE_SIZE_PUD:
-               rc = __dev_dax_pud_fault(dev_dax, vmf, &pfn);
+               rc = __dev_dax_pud_fault(dev_dax, vmf);
                break;
        default:
                rc = VM_FAULT_SIGBUS;
--
2.17.2
diff mbox series

Patch

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 9c87927d4bc2..0ef9fecec005 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -121,6 +121,8 @@  static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
 
 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
 
+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
 }
 
@@ -161,6 +163,8 @@  static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
 
 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
 
+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 }
 
@@ -203,6 +207,8 @@  static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
 
 	*pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
 
+	dax_set_mapping(vmf, *pfn, fault_size);
+
 	return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
 }
 #else
@@ -245,8 +251,6 @@  static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 		rc = VM_FAULT_SIGBUS;
 	}
 
-	if (rc == VM_FAULT_NOPAGE)
-		dax_set_mapping(vmf, pfn, fault_size);
 	dax_read_unlock(id);
 
 	return rc;