[08/18] dax: memcpy page in case of IOMAP_DAX_COW for mmap faults
diff mbox series

Message ID 20190429172649.8288-9-rgoldwyn@suse.de
State New
Headers show
Series
  • btrfs dax support
Related show

Commit Message

Goldwyn Rodrigues April 29, 2019, 5:26 p.m. UTC
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

Change dax_iomap_pfn to return the address as well in order to
use it for performing a memcpy in case the type is IOMAP_DAX_COW.
We don't handle PMD because btrfs does not support hugepages.

Question:
The sequence of bdev_dax_pgoff() and dax_direct_access() is
used multiple times to calculate address and pfn's. Would it make
sense to call it while calculating address as well to reduce code?

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/dax.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

Comments

Darrick J. Wong May 21, 2019, 5:46 p.m. UTC | #1
On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Change dax_iomap_pfn to return the address as well in order to
> use it for performing a memcpy in case the type is IOMAP_DAX_COW.
> We don't handle PMD because btrfs does not support hugepages.
> 
> Question:
> The sequence of bdev_dax_pgoff() and dax_direct_access() is
> used multiple times to calculate address and pfn's. Would it make
> sense to call it while calculating address as well to reduce code?
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/dax.c | 19 +++++++++++++++----
>  1 file changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 610bfa861a28..718b1632a39d 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
>  }
>  
>  static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> -			 pfn_t *pfnp)
> +			 pfn_t *pfnp, void **addr)
>  {
>  	const sector_t sector = dax_iomap_sector(iomap, pos);
>  	pgoff_t pgoff;
> @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
>  		return rc;
>  	id = dax_read_lock();
>  	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
> -				   NULL, pfnp);
> +				   addr, pfnp);
>  	if (length < 0) {
>  		rc = length;
>  		goto out;
> @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
>  	struct inode *inode = mapping->host;
>  	unsigned long vaddr = vmf->address;
> +	void *addr;
>  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
>  	struct iomap iomap = { 0 };

Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably
because the actor returns vm_fault_t, not bytes copied.  I guess that
makes it a tiny bit more complicated to pass in two (struct iomap *) to
the iomap_begin function...

>  	unsigned flags = IOMAP_FAULT;
> @@ -1375,16 +1376,26 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	sync = dax_fault_is_synchronous(flags, vma, &iomap);
>  
>  	switch (iomap.type) {
> +	case IOMAP_DAX_COW:
>  	case IOMAP_MAPPED:
>  		if (iomap.flags & IOMAP_F_NEW) {
>  			count_vm_event(PGMAJFAULT);
>  			count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
>  			major = VM_FAULT_MAJOR;
>  		}
> -		error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
> +		error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn, &addr);
>  		if (error < 0)
>  			goto error_finish_iomap;
>  
> +		if (iomap.type == IOMAP_DAX_COW) {
> +			if (iomap.inline_data) {
> +				error = memcpy_mcsafe(addr, iomap.inline_data,
> +					      PAGE_SIZE);
> +				if (error < 0)
> +					goto error_finish_iomap;
> +			} else
> +				memset(addr, 0, PAGE_SIZE);

This memcpy_mcsafe/memset chunk shows up a lot in this series.  Maybe it
should be a static inline within dax.c?

--D

> +		}
>  		entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
>  						 0, write && !sync);
>  
> @@ -1597,7 +1608,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  
>  	switch (iomap.type) {
>  	case IOMAP_MAPPED:
> -		error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
> +		error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn, NULL);
>  		if (error < 0)
>  			goto finish_iomap;
>  
> -- 
> 2.16.4
>
Goldwyn Rodrigues May 22, 2019, 7:11 p.m. UTC | #2
On 10:46 21/05, Darrick J. Wong wrote:
> On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote:
> > From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > 
> > Change dax_iomap_pfn to return the address as well in order to
> > use it for performing a memcpy in case the type is IOMAP_DAX_COW.
> > We don't handle PMD because btrfs does not support hugepages.
> > 
> > Question:
> > The sequence of bdev_dax_pgoff() and dax_direct_access() is
> > used multiple times to calculate address and pfn's. Would it make
> > sense to call it while calculating address as well to reduce code?
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > ---
> >  fs/dax.c | 19 +++++++++++++++----
> >  1 file changed, 15 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 610bfa861a28..718b1632a39d 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
> >  }
> >  
> >  static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> > -			 pfn_t *pfnp)
> > +			 pfn_t *pfnp, void **addr)
> >  {
> >  	const sector_t sector = dax_iomap_sector(iomap, pos);
> >  	pgoff_t pgoff;
> > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> >  		return rc;
> >  	id = dax_read_lock();
> >  	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
> > -				   NULL, pfnp);
> > +				   addr, pfnp);
> >  	if (length < 0) {
> >  		rc = length;
> >  		goto out;
> > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
> >  	struct inode *inode = mapping->host;
> >  	unsigned long vaddr = vmf->address;
> > +	void *addr;
> >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> >  	struct iomap iomap = { 0 };
> 
> Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably
> because the actor returns vm_fault_t, not bytes copied.  I guess that
> makes it a tiny bit more complicated to pass in two (struct iomap *) to
> the iomap_begin function...

I am not sure I understand this. We do not use iomap_apply() in
the fault path: dax_iomap_pte_fault(). We just use iomap_begin()
and iomap_end(). So, why can we not implement your idea of using two
iomaps? What does open-coding iomap-apply mean?
Darrick J. Wong May 23, 2019, 4:02 a.m. UTC | #3
On Wed, May 22, 2019 at 02:11:39PM -0500, Goldwyn Rodrigues wrote:
> On 10:46 21/05, Darrick J. Wong wrote:
> > On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote:
> > > From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > > 
> > > Change dax_iomap_pfn to return the address as well in order to
> > > use it for performing a memcpy in case the type is IOMAP_DAX_COW.
> > > We don't handle PMD because btrfs does not support hugepages.
> > > 
> > > Question:
> > > The sequence of bdev_dax_pgoff() and dax_direct_access() is
> > > used multiple times to calculate address and pfn's. Would it make
> > > sense to call it while calculating address as well to reduce code?
> > > 
> > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > > ---
> > >  fs/dax.c | 19 +++++++++++++++----
> > >  1 file changed, 15 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/fs/dax.c b/fs/dax.c
> > > index 610bfa861a28..718b1632a39d 100644
> > > --- a/fs/dax.c
> > > +++ b/fs/dax.c
> > > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
> > >  }
> > >  
> > >  static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> > > -			 pfn_t *pfnp)
> > > +			 pfn_t *pfnp, void **addr)
> > >  {
> > >  	const sector_t sector = dax_iomap_sector(iomap, pos);
> > >  	pgoff_t pgoff;
> > > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> > >  		return rc;
> > >  	id = dax_read_lock();
> > >  	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
> > > -				   NULL, pfnp);
> > > +				   addr, pfnp);
> > >  	if (length < 0) {
> > >  		rc = length;
> > >  		goto out;
> > > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> > >  	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
> > >  	struct inode *inode = mapping->host;
> > >  	unsigned long vaddr = vmf->address;
> > > +	void *addr;
> > >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> > >  	struct iomap iomap = { 0 };
> > 
> > Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably
> > because the actor returns vm_fault_t, not bytes copied.  I guess that
> > makes it a tiny bit more complicated to pass in two (struct iomap *) to
> > the iomap_begin function...
> 
> I am not sure I understand this. We do not use iomap_apply() in
> the fault path: dax_iomap_pte_fault(). We just use iomap_begin()
> and iomap_end(). So, why can we not implement your idea of using two
> iomaps?

Oh, sorry, I wasn't trying to say that calling ->iomap_begin made it
*impossible* to implement.  I was merely complaining about the increased
maintenance burden that results from open coding -- now there are three
places where we have to change a struct iomap declaration, not one
(iomap_apply) as I had originally thought.

> What does open-coding iomap-apply mean?

Any function that calls (1) ->iomap_begin; (2) performs an action on the
returned iomap; and (3) then calls calling ->iomap_end.  That's what
iomap_apply() does.

Really I'm just being maintainer-cranky.  Ignore me for now. :)

--D

> 
> -- 
> Goldwyn
Jan Kara May 23, 2019, 12:10 p.m. UTC | #4
On Tue 21-05-19 10:46:25, Darrick J. Wong wrote:
> On Mon, Apr 29, 2019 at 12:26:39PM -0500, Goldwyn Rodrigues wrote:
> > From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > 
> > Change dax_iomap_pfn to return the address as well in order to
> > use it for performing a memcpy in case the type is IOMAP_DAX_COW.
> > We don't handle PMD because btrfs does not support hugepages.
> > 
> > Question:
> > The sequence of bdev_dax_pgoff() and dax_direct_access() is
> > used multiple times to calculate address and pfn's. Would it make
> > sense to call it while calculating address as well to reduce code?
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > ---
> >  fs/dax.c | 19 +++++++++++++++----
> >  1 file changed, 15 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 610bfa861a28..718b1632a39d 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -984,7 +984,7 @@ static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
> >  }
> >  
> >  static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> > -			 pfn_t *pfnp)
> > +			 pfn_t *pfnp, void **addr)
> >  {
> >  	const sector_t sector = dax_iomap_sector(iomap, pos);
> >  	pgoff_t pgoff;
> > @@ -996,7 +996,7 @@ static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
> >  		return rc;
> >  	id = dax_read_lock();
> >  	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
> > -				   NULL, pfnp);
> > +				   addr, pfnp);
> >  	if (length < 0) {
> >  		rc = length;
> >  		goto out;
> > @@ -1286,6 +1286,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
> >  	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
> >  	struct inode *inode = mapping->host;
> >  	unsigned long vaddr = vmf->address;
> > +	void *addr;
> >  	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> >  	struct iomap iomap = { 0 };
> 
> Ugh, I had forgotten that fs/dax.c open-codes iomap_apply, probably
> because the actor returns vm_fault_t, not bytes copied.  I guess that
> makes it a tiny bit more complicated to pass in two (struct iomap *) to
> the iomap_begin function...

Hum, right. We could actually reimplement dax_iomap_{pte|pmd}_fault() using
iomap_apply(). We would just need to propagate error code out of our
'actor' inside the structure pointed to by 'data'. But that's doable.

								Honza

Patch
diff mbox series

diff --git a/fs/dax.c b/fs/dax.c
index 610bfa861a28..718b1632a39d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -984,7 +984,7 @@  static sector_t dax_iomap_sector(struct iomap *iomap, loff_t pos)
 }
 
 static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
-			 pfn_t *pfnp)
+			 pfn_t *pfnp, void **addr)
 {
 	const sector_t sector = dax_iomap_sector(iomap, pos);
 	pgoff_t pgoff;
@@ -996,7 +996,7 @@  static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
 		return rc;
 	id = dax_read_lock();
 	length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
-				   NULL, pfnp);
+				   addr, pfnp);
 	if (length < 0) {
 		rc = length;
 		goto out;
@@ -1286,6 +1286,7 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
 	struct inode *inode = mapping->host;
 	unsigned long vaddr = vmf->address;
+	void *addr;
 	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
 	struct iomap iomap = { 0 };
 	unsigned flags = IOMAP_FAULT;
@@ -1375,16 +1376,26 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	sync = dax_fault_is_synchronous(flags, vma, &iomap);
 
 	switch (iomap.type) {
+	case IOMAP_DAX_COW:
 	case IOMAP_MAPPED:
 		if (iomap.flags & IOMAP_F_NEW) {
 			count_vm_event(PGMAJFAULT);
 			count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
 			major = VM_FAULT_MAJOR;
 		}
-		error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn);
+		error = dax_iomap_pfn(&iomap, pos, PAGE_SIZE, &pfn, &addr);
 		if (error < 0)
 			goto error_finish_iomap;
 
+		if (iomap.type == IOMAP_DAX_COW) {
+			if (iomap.inline_data) {
+				error = memcpy_mcsafe(addr, iomap.inline_data,
+					      PAGE_SIZE);
+				if (error < 0)
+					goto error_finish_iomap;
+			} else
+				memset(addr, 0, PAGE_SIZE);
+		}
 		entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
 						 0, write && !sync);
 
@@ -1597,7 +1608,7 @@  static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 
 	switch (iomap.type) {
 	case IOMAP_MAPPED:
-		error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn);
+		error = dax_iomap_pfn(&iomap, pos, PMD_SIZE, &pfn, NULL);
 		if (error < 0)
 			goto finish_iomap;