diff mbox series

[4/7] fsdax: Replace mmap entry in case of CoW

Message ID 20210207170924.2933035-5-ruansy.fnst@cn.fujitsu.com (mailing list archive)
State New
Headers show
Series fsdax,xfs: Add reflink&dedupe support for fsdax | expand

Commit Message

Ruan Shiyang Feb. 7, 2021, 5:09 p.m. UTC
We replace the existing entry to the newly allocated one
in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE
so writeback marks this entry as writeprotected. This
helps us snapshots so new write pagefaults after snapshots
trigger a CoW.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
---
 fs/dax.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

Comments

Christoph Hellwig Feb. 8, 2021, 3:16 p.m. UTC | #1
>  static void *dax_insert_entry(struct xa_state *xas,
>  		struct address_space *mapping, struct vm_fault *vmf,
> -		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
> +		void *entry, pfn_t pfn, unsigned long flags, bool insert_flags)
>  {
>  	void *new_entry = dax_make_entry(pfn, flags);
> +	bool dirty = insert_flags & DAX_IF_DIRTY;
> +	bool cow = insert_flags & DAX_IF_COW;
>  
>  	if (dirty)
>  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);

Can we just move the __mark_inode_dirty to the one caller that needs it
in a prep patch?
David Sterba Feb. 16, 2021, 1:11 p.m. UTC | #2
On Mon, Feb 08, 2021 at 01:09:21AM +0800, Shiyang Ruan wrote:
> We replace the existing entry to the newly allocated one
> in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE
> so writeback marks this entry as writeprotected. This
> helps us snapshots so new write pagefaults after snapshots
> trigger a CoW.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
> ---
>  fs/dax.c | 31 +++++++++++++++++++++++--------
>  1 file changed, 23 insertions(+), 8 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index b2195cbdf2dc..29698a3d2e37 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -722,6 +722,9 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>  	return 0;
>  }
>  
> +#define DAX_IF_DIRTY		(1ULL << 0)
> +#define DAX_IF_COW		(1ULL << 1)

The constants are ULL, but I see other flags only 'unsigned long'

> +
>  /*
>   * By this point grab_mapping_entry() has ensured that we have a locked entry
>   * of the appropriate size so we don't have to worry about downgrading PMDs to
> @@ -731,14 +734,16 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>   */
>  static void *dax_insert_entry(struct xa_state *xas,
>  		struct address_space *mapping, struct vm_fault *vmf,
> -		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
> +		void *entry, pfn_t pfn, unsigned long flags, bool insert_flags)

insert_flags is bool

>  {
>  	void *new_entry = dax_make_entry(pfn, flags);
> +	bool dirty = insert_flags & DAX_IF_DIRTY;

"insert_flags & DAX_IF_DIRTY" is "bool & ULL", this can't be right

> +	bool cow = insert_flags & DAX_IF_COW;

Same

>  
>  	if (dirty)
>  		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
>  
> -	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
> +	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
>  		unsigned long index = xas->xa_index;
>  		/* we are replacing a zero page with block mapping */
>  		if (dax_is_pmd_entry(entry))
> @@ -750,7 +755,7 @@ static void *dax_insert_entry(struct xa_state *xas,
>  
>  	xas_reset(xas);
>  	xas_lock_irq(xas);
> -	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
> +	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
>  		void *old;
>  
>  		dax_disassociate_entry(entry, mapping, false);
> @@ -774,6 +779,9 @@ static void *dax_insert_entry(struct xa_state *xas,
>  	if (dirty)
>  		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
>  
> +	if (cow)
> +		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
> +
>  	xas_unlock_irq(xas);
>  	return entry;
>  }
> @@ -1319,6 +1327,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	void *entry;
>  	pfn_t pfn;
>  	void *kaddr;
> +	unsigned long insert_flags = 0;
>  
>  	trace_dax_pte_fault(inode, vmf, ret);
>  	/*
> @@ -1444,8 +1453,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  
>  		goto finish_iomap;
>  	case IOMAP_UNWRITTEN:
> -		if (write && iomap.flags & IOMAP_F_SHARED)
> +		if (write && (iomap.flags & IOMAP_F_SHARED)) {
> +			insert_flags |= DAX_IF_COW;

Here's an example of 'unsigned long = unsigned long long', though it'll
work, it would be better to unify all the types.

>  			goto cow;
> +		}
>  		fallthrough;
>  	case IOMAP_HOLE:
>  		if (!write) {
> @@ -1555,6 +1566,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  	int error;
>  	pfn_t pfn;
>  	void *kaddr;
> +	unsigned long insert_flags = 0;
>  
>  	/*
>  	 * Check whether offset isn't beyond end of file now. Caller is
> @@ -1670,14 +1682,17 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>  		result = vmf_insert_pfn_pmd(vmf, pfn, write);
>  		break;
>  	case IOMAP_UNWRITTEN:
> -		if (write && iomap.flags & IOMAP_F_SHARED)
> +		if (write && (iomap.flags & IOMAP_F_SHARED)) {
> +			insert_flags |= DAX_IF_COW;
>  			goto cow;
> +		}
>  		fallthrough;
>  	case IOMAP_HOLE:
> -		if (WARN_ON_ONCE(write))
> +		if (!write) {
> +			result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
>  			break;
> -		result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
> -		break;
> +		}
> +		fallthrough;
>  	default:
>  		WARN_ON_ONCE(1);
>  		break;
> -- 
> 2.30.0
> 
>
Ruan Shiyang Feb. 17, 2021, 3:06 a.m. UTC | #3
On 2021/2/16 下午9:11, David Sterba wrote:
> On Mon, Feb 08, 2021 at 01:09:21AM +0800, Shiyang Ruan wrote:
>> We replace the existing entry to the newly allocated one
>> in case of CoW. Also, we mark the entry as PAGECACHE_TAG_TOWRITE
>> so writeback marks this entry as writeprotected. This
>> helps us snapshots so new write pagefaults after snapshots
>> trigger a CoW.
>>
>> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>> Signed-off-by: Shiyang Ruan <ruansy.fnst@cn.fujitsu.com>
>> ---
>>   fs/dax.c | 31 +++++++++++++++++++++++--------
>>   1 file changed, 23 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/dax.c b/fs/dax.c
>> index b2195cbdf2dc..29698a3d2e37 100644
>> --- a/fs/dax.c
>> +++ b/fs/dax.c
>> @@ -722,6 +722,9 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>>   	return 0;
>>   }
>>   
>> +#define DAX_IF_DIRTY		(1ULL << 0)
>> +#define DAX_IF_COW		(1ULL << 1)
> 
> The constants are ULL, but I see other flags only 'unsigned long'
> 
>> +
>>   /*
>>    * By this point grab_mapping_entry() has ensured that we have a locked entry
>>    * of the appropriate size so we don't have to worry about downgrading PMDs to
>> @@ -731,14 +734,16 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
>>    */
>>   static void *dax_insert_entry(struct xa_state *xas,
>>   		struct address_space *mapping, struct vm_fault *vmf,
>> -		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
>> +		void *entry, pfn_t pfn, unsigned long flags, bool insert_flags)
> 
> insert_flags is bool
> 
>>   {
>>   	void *new_entry = dax_make_entry(pfn, flags);
>> +	bool dirty = insert_flags & DAX_IF_DIRTY;
> 
> "insert_flags & DAX_IF_DIRTY" is "bool & ULL", this can't be right

This is a mistake caused by rebasing my old version patchset.  Thanks 
for pointing out.  I'll fix this in next version.
> 
>> +	bool cow = insert_flags & DAX_IF_COW;
> 
> Same
> 
>>   
>>   	if (dirty)
>>   		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
>>   
>> -	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
>> +	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
>>   		unsigned long index = xas->xa_index;
>>   		/* we are replacing a zero page with block mapping */
>>   		if (dax_is_pmd_entry(entry))
>> @@ -750,7 +755,7 @@ static void *dax_insert_entry(struct xa_state *xas,
>>   
>>   	xas_reset(xas);
>>   	xas_lock_irq(xas);
>> -	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
>> +	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
>>   		void *old;
>>   
>>   		dax_disassociate_entry(entry, mapping, false);
>> @@ -774,6 +779,9 @@ static void *dax_insert_entry(struct xa_state *xas,
>>   	if (dirty)
>>   		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
>>   
>> +	if (cow)
>> +		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
>> +
>>   	xas_unlock_irq(xas);
>>   	return entry;
>>   }
>> @@ -1319,6 +1327,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>>   	void *entry;
>>   	pfn_t pfn;
>>   	void *kaddr;
>> +	unsigned long insert_flags = 0;
>>   
>>   	trace_dax_pte_fault(inode, vmf, ret);
>>   	/*
>> @@ -1444,8 +1453,10 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
>>   
>>   		goto finish_iomap;
>>   	case IOMAP_UNWRITTEN:
>> -		if (write && iomap.flags & IOMAP_F_SHARED)
>> +		if (write && (iomap.flags & IOMAP_F_SHARED)) {
>> +			insert_flags |= DAX_IF_COW;
> 
> Here's an example of 'unsigned long = unsigned long long', though it'll
> work, it would be better to unify all the types.

Yes, I'll fix it.


--
Thanks,
Ruan Shiyang.
> 
>>   			goto cow;
>> +		}
>>   		fallthrough;
>>   	case IOMAP_HOLE:
>>   		if (!write) {
>> @@ -1555,6 +1566,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>>   	int error;
>>   	pfn_t pfn;
>>   	void *kaddr;
>> +	unsigned long insert_flags = 0;
>>   
>>   	/*
>>   	 * Check whether offset isn't beyond end of file now. Caller is
>> @@ -1670,14 +1682,17 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
>>   		result = vmf_insert_pfn_pmd(vmf, pfn, write);
>>   		break;
>>   	case IOMAP_UNWRITTEN:
>> -		if (write && iomap.flags & IOMAP_F_SHARED)
>> +		if (write && (iomap.flags & IOMAP_F_SHARED)) {
>> +			insert_flags |= DAX_IF_COW;
>>   			goto cow;
>> +		}
>>   		fallthrough;
>>   	case IOMAP_HOLE:
>> -		if (WARN_ON_ONCE(write))
>> +		if (!write) {
>> +			result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
>>   			break;
>> -		result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
>> -		break;
>> +		}
>> +		fallthrough;
>>   	default:
>>   		WARN_ON_ONCE(1);
>>   		break;
>> -- 
>> 2.30.0
>>
>>
> 
>
diff mbox series

Patch

diff --git a/fs/dax.c b/fs/dax.c
index b2195cbdf2dc..29698a3d2e37 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -722,6 +722,9 @@  static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
 	return 0;
 }
 
+#define DAX_IF_DIRTY		(1ULL << 0)
+#define DAX_IF_COW		(1ULL << 1)
+
 /*
  * By this point grab_mapping_entry() has ensured that we have a locked entry
  * of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -731,14 +734,16 @@  static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d
  */
 static void *dax_insert_entry(struct xa_state *xas,
 		struct address_space *mapping, struct vm_fault *vmf,
-		void *entry, pfn_t pfn, unsigned long flags, bool dirty)
+		void *entry, pfn_t pfn, unsigned long flags, bool insert_flags)
 {
 	void *new_entry = dax_make_entry(pfn, flags);
+	bool dirty = insert_flags & DAX_IF_DIRTY;
+	bool cow = insert_flags & DAX_IF_COW;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
+	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
 		unsigned long index = xas->xa_index;
 		/* we are replacing a zero page with block mapping */
 		if (dax_is_pmd_entry(entry))
@@ -750,7 +755,7 @@  static void *dax_insert_entry(struct xa_state *xas,
 
 	xas_reset(xas);
 	xas_lock_irq(xas);
-	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		void *old;
 
 		dax_disassociate_entry(entry, mapping, false);
@@ -774,6 +779,9 @@  static void *dax_insert_entry(struct xa_state *xas,
 	if (dirty)
 		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
 
+	if (cow)
+		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
+
 	xas_unlock_irq(xas);
 	return entry;
 }
@@ -1319,6 +1327,7 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	void *entry;
 	pfn_t pfn;
 	void *kaddr;
+	unsigned long insert_flags = 0;
 
 	trace_dax_pte_fault(inode, vmf, ret);
 	/*
@@ -1444,8 +1453,10 @@  static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 
 		goto finish_iomap;
 	case IOMAP_UNWRITTEN:
-		if (write && iomap.flags & IOMAP_F_SHARED)
+		if (write && (iomap.flags & IOMAP_F_SHARED)) {
+			insert_flags |= DAX_IF_COW;
 			goto cow;
+		}
 		fallthrough;
 	case IOMAP_HOLE:
 		if (!write) {
@@ -1555,6 +1566,7 @@  static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	int error;
 	pfn_t pfn;
 	void *kaddr;
+	unsigned long insert_flags = 0;
 
 	/*
 	 * Check whether offset isn't beyond end of file now. Caller is
@@ -1670,14 +1682,17 @@  static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		result = vmf_insert_pfn_pmd(vmf, pfn, write);
 		break;
 	case IOMAP_UNWRITTEN:
-		if (write && iomap.flags & IOMAP_F_SHARED)
+		if (write && (iomap.flags & IOMAP_F_SHARED)) {
+			insert_flags |= DAX_IF_COW;
 			goto cow;
+		}
 		fallthrough;
 	case IOMAP_HOLE:
-		if (WARN_ON_ONCE(write))
+		if (!write) {
+			result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
 			break;
-		result = dax_pmd_load_hole(&xas, vmf, &iomap, &entry);
-		break;
+		}
+		fallthrough;
 	default:
 		WARN_ON_ONCE(1);
 		break;