diff mbox series

[v3,05/10] fsdax: Replace mmap entry in case of CoW

Message ID 20210319015237.993880-6-ruansy.fnst@fujitsu.com (mailing list archive)
State New, archived
Headers show
Series fsdax, xfs: Add reflink&dedupe support for fsdax | expand

Commit Message

Shiyang Ruan March 19, 2021, 1:52 a.m. UTC
We replace the existing entry to the newly allocated one in case of CoW.
Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this
entry as writeprotected.  This helps us snapshots so new write
pagefaults after snapshots trigger a CoW.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/dax.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

Comments

Ritesh Harjani (IBM) April 1, 2021, 6:39 a.m. UTC | #1
On 21/03/19 09:52AM, Shiyang Ruan wrote:
> We replace the existing entry to the newly allocated one in case of CoW.
> Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks this
> entry as writeprotected.  This helps us snapshots so new write
> pagefaults after snapshots trigger a CoW.
>

Please correct me here. So the flow is like this.
1. In case of CoW or a reflinked file, on an mmaped file if write is attempted,
   Then in DAX fault handler code, ->iomap_begin() on a given filesystem will
   populate iomap and srcmap. srcmap being from where the read needs to be
   attempted from and iomap on where the new write should go to.
2. So the dax_insert_entry() code as part of the fault handling will take care
   of removing the old entry and inserting the new pfn entry to xas and mark
   it with PAGECACHE_TAG_TOWRITE so that dax writeback can mark the entry as
   write protected.
Is my above understanding correct?

> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/dax.c | 37 ++++++++++++++++++++++++++-----------
>  1 file changed, 26 insertions(+), 11 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index 181aad97136a..cfe513eb111e 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -722,6 +722,9 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>  	return 0;
>  }
>
> +#define DAX_IF_DIRTY		(1 << 0)
> +#define DAX_IF_COW		(1 << 1)
> +
>
small comment expalining this means DAX insert flags used in dax_insert_entry()

>
>  /*
>   * By this point grab_mapping_entry() has ensured that we have a locked entry
>   * of the appropriate size so we don't have to worry about downgrading PMDs to
> @@ -729,16 +732,19 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>   * already in the tree, we will skip the insertion and just dirty the PMD as
>   * appropriate.
>   */
> -static void *dax_insert_entry(struct xa_state *xas,
> -		struct address_space *mapping, struct vm_fault *vmf,
> -		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
> +static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
> +		void *entry, pfn_t pfn, unsigned long flags,
> +		unsigned int insert_flags)
>  {
> +	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
>  	void *new_entry = dax_make_entry(pfn, flags);
> +	bool dirty = insert_flags & DAX_IF_DIRTY;
> +	bool cow = insert_flags & DAX_IF_COW;
>
>  	if (dirty)
>  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
>
> -	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
> +	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
>  		unsigned long index = xas->xa_index;
>  		/* we are replacing a zero page with block mapping */
>  		if (dax_is_pmd_entry(entry))
> @@ -750,7 +756,7 @@ static void *dax_insert_entry(struct xa_state *xas,
>
>  	xas_reset(xas);
>  	xas_lock_irq(xas);
> -	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> +	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
>  		void *old;
>
>  		dax_disassociate_entry(entry, mapping, false);
> @@ -774,6 +780,9 @@ static void *dax_insert_entry(struct xa_state *xas,
>  	if (dirty)
>  		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
>
> +	if (cow)
> +		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
> +
>  	xas_unlock_irq(xas);
>  	return entry;
>  }
> @@ -1098,8 +1107,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
>  	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
>  	vm_fault_t ret;
>
> -	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> -			DAX_ZERO_PAGE, false);
> +	*entry = dax_insert_entry(xas, vmf, *entry, pfn, DAX_ZERO_PAGE, 0);
>
>  	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
>  	trace_dax_load_hole(inode, vmf, ret);
> @@ -1126,8 +1134,8 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
>  		goto fallback;
>
>  	pfn = page_to_pfn_t(zero_page);
> -	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> -			DAX_PMD | DAX_ZERO_PAGE, false);
> +	*entry = dax_insert_entry(xas, vmf, *entry, pfn,
> +				  DAX_PMD | DAX_ZERO_PAGE, 0);
>
>  	if (arch_needs_pgtable_deposit()) {
>  		pgtable = pte_alloc_one(vma->vm_mm);
> @@ -1431,6 +1439,7 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
>  	loff_t pos = (loff_t)xas->xa_offset << PAGE_SHIFT;
>  	bool write = vmf->flags & FAULT_FLAG_WRITE;
>  	bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
> +	unsigned int insert_flags = 0;
>  	int err = 0;
>  	pfn_t pfn;
>  	void *kaddr;
> @@ -1453,8 +1462,14 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
>  	if (err)
>  		return dax_fault_return(err);
>
> -	entry = dax_insert_entry(xas, mapping, vmf, entry, pfn, 0,
> -				 write && !sync);
> +	if (write) {
> +		if (!sync)
> +			insert_flags |= DAX_IF_DIRTY;
> +		if (iomap->flags & IOMAP_F_SHARED)
> +			insert_flags |= DAX_IF_COW;
> +	}
> +
> +	entry = dax_insert_entry(xas, vmf, entry, pfn, 0, insert_flags);
>
>  	if (write && srcmap->addr != iomap->addr) {
>  		err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr, false);
>

Rest looks good to me. Please feel free to add
Reviewed-by: Ritesh Harjani <riteshh@gmail.com>

sorry about changing my email in between of this code review.
I am planning to use above gmail id as primary account for all upstream work
from now.

> --
> 2.30.1
>
>
>
Shiyang Ruan April 1, 2021, 7:03 a.m. UTC | #2
> -----Original Message-----
> From: Ritesh Harjani <ritesh.list@gmail.com>
> Sent: Thursday, April 1, 2021 2:40 PM
> Subject: Re: [PATCH v3 05/10] fsdax: Replace mmap entry in case of CoW
> 
> On 21/03/19 09:52AM, Shiyang Ruan wrote:
> > We replace the existing entry to the newly allocated one in case of CoW.
> > Also, we mark the entry as PAGECACHE_TAG_TOWRITE so writeback marks
> > this entry as writeprotected.  This helps us snapshots so new write
> > pagefaults after snapshots trigger a CoW.
> >
> 
> Please correct me here. So the flow is like this.
> 1. In case of CoW or a reflinked file, on an mmaped file if write is attempted,
>    Then in DAX fault handler code, ->iomap_begin() on a given filesystem will
>    populate iomap and srcmap. srcmap being from where the read needs to be
>    attempted from and iomap on where the new write should go to.
> 2. So the dax_insert_entry() code as part of the fault handling will take care
>    of removing the old entry and inserting the new pfn entry to xas and mark
>    it with PAGECACHE_TAG_TOWRITE so that dax writeback can mark the
> entry as
>    write protected.
> Is my above understanding correct?

Yes, you are right.

> 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > ---
> >  fs/dax.c | 37 ++++++++++++++++++++++++++-----------
> >  1 file changed, 26 insertions(+), 11 deletions(-)
> >
> > diff --git a/fs/dax.c b/fs/dax.c
> > index 181aad97136a..cfe513eb111e 100644
> > --- a/fs/dax.c
> > +++ b/fs/dax.c
> > @@ -722,6 +722,9 @@ static int copy_cow_page_dax(struct block_device
> *bdev, struct dax_device *dax_d
> >  	return 0;
> >  }
> >
> > +#define DAX_IF_DIRTY		(1 << 0)
> > +#define DAX_IF_COW		(1 << 1)
> > +
> >
> small comment expalining this means DAX insert flags used in dax_insert_entry()

OK.  I'll add it.
> 
> >
> >  /*
> >   * By this point grab_mapping_entry() has ensured that we have a locked
> entry
> >   * of the appropriate size so we don't have to worry about
> > downgrading PMDs to @@ -729,16 +732,19 @@ static int
> copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
> >   * already in the tree, we will skip the insertion and just dirty the PMD as
> >   * appropriate.
> >   */
> > -static void *dax_insert_entry(struct xa_state *xas,
> > -		struct address_space *mapping, struct vm_fault *vmf,
> > -		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
> > +static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
> > +		void *entry, pfn_t pfn, unsigned long flags,
> > +		unsigned int insert_flags)
> >  {
> > +	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
> >  	void *new_entry = dax_make_entry(pfn, flags);
> > +	bool dirty = insert_flags & DAX_IF_DIRTY;
> > +	bool cow = insert_flags & DAX_IF_COW;
> >
> >  	if (dirty)
> >  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
> >
> > -	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
> > +	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
> >  		unsigned long index = xas->xa_index;
> >  		/* we are replacing a zero page with block mapping */
> >  		if (dax_is_pmd_entry(entry))
> > @@ -750,7 +756,7 @@ static void *dax_insert_entry(struct xa_state
> > *xas,
> >
> >  	xas_reset(xas);
> >  	xas_lock_irq(xas);
> > -	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> > +	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> >  		void *old;
> >
> >  		dax_disassociate_entry(entry, mapping, false); @@ -774,6 +780,9
> @@
> > static void *dax_insert_entry(struct xa_state *xas,
> >  	if (dirty)
> >  		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
> >
> > +	if (cow)
> > +		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
> > +
> >  	xas_unlock_irq(xas);
> >  	return entry;
> >  }
> > @@ -1098,8 +1107,7 @@ static vm_fault_t dax_load_hole(struct xa_state
> *xas,
> >  	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
> >  	vm_fault_t ret;
> >
> > -	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> > -			DAX_ZERO_PAGE, false);
> > +	*entry = dax_insert_entry(xas, vmf, *entry, pfn, DAX_ZERO_PAGE, 0);
> >
> >  	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
> >  	trace_dax_load_hole(inode, vmf, ret); @@ -1126,8 +1134,8 @@ static
> > vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
> >  		goto fallback;
> >
> >  	pfn = page_to_pfn_t(zero_page);
> > -	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
> > -			DAX_PMD | DAX_ZERO_PAGE, false);
> > +	*entry = dax_insert_entry(xas, vmf, *entry, pfn,
> > +				  DAX_PMD | DAX_ZERO_PAGE, 0);
> >
> >  	if (arch_needs_pgtable_deposit()) {
> >  		pgtable = pte_alloc_one(vma->vm_mm); @@ -1431,6 +1439,7 @@
> static
> > vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
> >  	loff_t pos = (loff_t)xas->xa_offset << PAGE_SHIFT;
> >  	bool write = vmf->flags & FAULT_FLAG_WRITE;
> >  	bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
> > +	unsigned int insert_flags = 0;
> >  	int err = 0;
> >  	pfn_t pfn;
> >  	void *kaddr;
> > @@ -1453,8 +1462,14 @@ static vm_fault_t dax_fault_actor(struct vm_fault
> *vmf, pfn_t *pfnp,
> >  	if (err)
> >  		return dax_fault_return(err);
> >
> > -	entry = dax_insert_entry(xas, mapping, vmf, entry, pfn, 0,
> > -				 write && !sync);
> > +	if (write) {
> > +		if (!sync)
> > +			insert_flags |= DAX_IF_DIRTY;
> > +		if (iomap->flags & IOMAP_F_SHARED)
> > +			insert_flags |= DAX_IF_COW;
> > +	}
> > +
> > +	entry = dax_insert_entry(xas, vmf, entry, pfn, 0, insert_flags);
> >
> >  	if (write && srcmap->addr != iomap->addr) {
> >  		err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr, false);
> >
> 
> Rest looks good to me. Please feel free to add
> Reviewed-by: Ritesh Harjani <riteshh@gmail.com>
> 
> sorry about changing my email in between of this code review.
> I am planning to use above gmail id as primary account for all upstream work
> from now.
> 

--
Thanks,
Ruan Shiyang.

> > --
> > 2.30.1
> >
> >
> >
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index 181aad97136a..cfe513eb111e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -722,6 +722,9 @@  static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
 	return 0;
 }
 
+#define DAX_IF_DIRTY		(1 << 0)
+#define DAX_IF_COW		(1 << 1)
+
 /*
  * By this point grab_mapping_entry() has ensured that we have a locked entry
  * of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -729,16 +732,19 @@  static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
  * already in the tree, we will skip the insertion and just dirty the PMD as
  * appropriate.
  */
-static void *dax_insert_entry(struct xa_state *xas,
-		struct address_space *mapping, struct vm_fault *vmf,
-		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
+static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
+		void *entry, pfn_t pfn, unsigned long flags,
+		unsigned int insert_flags)
 {
+	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	void *new_entry = dax_make_entry(pfn, flags);
+	bool dirty = insert_flags & DAX_IF_DIRTY;
+	bool cow = insert_flags & DAX_IF_COW;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
 		unsigned long index = xas->xa_index;
 		/* we are replacing a zero page with block mapping */
 		if (dax_is_pmd_entry(entry))
@@ -750,7 +756,7 @@  static void *dax_insert_entry(struct xa_state *xas,
 
 	xas_reset(xas);
 	xas_lock_irq(xas);
-	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		void *old;
 
 		dax_disassociate_entry(entry, mapping, false);
@@ -774,6 +780,9 @@  static void *dax_insert_entry(struct xa_state *xas,
 	if (dirty)
 		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
 
+	if (cow)
+		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
+
 	xas_unlock_irq(xas);
 	return entry;
 }
@@ -1098,8 +1107,7 @@  static vm_fault_t dax_load_hole(struct xa_state *xas,
 	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
 	vm_fault_t ret;
 
-	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
-			DAX_ZERO_PAGE, false);
+	*entry = dax_insert_entry(xas, vmf, *entry, pfn, DAX_ZERO_PAGE, 0);
 
 	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
 	trace_dax_load_hole(inode, vmf, ret);
@@ -1126,8 +1134,8 @@  static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 		goto fallback;
 
 	pfn = page_to_pfn_t(zero_page);
-	*entry = dax_insert_entry(xas, mapping, vmf, *entry, pfn,
-			DAX_PMD | DAX_ZERO_PAGE, false);
+	*entry = dax_insert_entry(xas, vmf, *entry, pfn,
+				  DAX_PMD | DAX_ZERO_PAGE, 0);
 
 	if (arch_needs_pgtable_deposit()) {
 		pgtable = pte_alloc_one(vma->vm_mm);
@@ -1431,6 +1439,7 @@  static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
 	loff_t pos = (loff_t)xas->xa_offset << PAGE_SHIFT;
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
+	unsigned int insert_flags = 0;
 	int err = 0;
 	pfn_t pfn;
 	void *kaddr;
@@ -1453,8 +1462,14 @@  static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
 	if (err)
 		return dax_fault_return(err);
 
-	entry = dax_insert_entry(xas, mapping, vmf, entry, pfn, 0,
-				 write && !sync);
+	if (write) {
+		if (!sync)
+			insert_flags |= DAX_IF_DIRTY;
+		if (iomap->flags & IOMAP_F_SHARED)
+			insert_flags |= DAX_IF_COW;
+	}
+
+	entry = dax_insert_entry(xas, vmf, entry, pfn, 0, insert_flags);
 
 	if (write && srcmap->addr != iomap->addr) {
 		err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr, false);