diff mbox

[V5,11/13] Btrfs: Clean pte corresponding to page straddling i_size

Message ID 1443608912-31667-12-git-send-email-chandan@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Chandan Rajendra Sept. 30, 2015, 10:28 a.m. UTC
When extending a file by either "truncate up" or by writing beyond i_size, the
page which had i_size needs to be marked "read only" so that future writes to
the page via mmap interface causes btrfs_page_mkwrite() to be invoked. If not,
a write performed after extending the file via the mmap interface will find
the page to be writaeable and continue writing to the page without invoking
btrfs_page_mkwrite() i.e. we end up writing to a file without reserving disk
space.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
 fs/btrfs/file.c  | 12 ++++++++++--
 fs/btrfs/inode.c |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

Comments

Josef Bacik Oct. 1, 2015, 2:57 p.m. UTC | #1
On 09/30/2015 06:28 AM, Chandan Rajendra wrote:
> When extending a file by either "truncate up" or by writing beyond i_size, the
> page which had i_size needs to be marked "read only" so that future writes to
> the page via mmap interface causes btrfs_page_mkwrite() to be invoked. If not,
> a write performed after extending the file via the mmap interface will find
> the page to be writaeable and continue writing to the page without invoking
> btrfs_page_mkwrite() i.e. we end up writing to a file without reserving disk
> space.
>
> Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
> ---
>   fs/btrfs/file.c  | 12 ++++++++++--
>   fs/btrfs/inode.c |  2 +-
>   2 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 360d56d..5715e29 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1757,6 +1757,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
>   	ssize_t err;
>   	loff_t pos;
>   	size_t count;
> +	loff_t oldsize;
> +	int clean_page = 0;
>
>   	mutex_lock(&inode->i_mutex);
>   	err = generic_write_checks(iocb, from);
> @@ -1795,14 +1797,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
>   	pos = iocb->ki_pos;
>   	count = iov_iter_count(from);
>   	start_pos = round_down(pos, root->sectorsize);
> -	if (start_pos > i_size_read(inode)) {
> +	oldsize = i_size_read(inode);
> +	if (start_pos > oldsize) {
>   		/* Expand hole size to cover write data, preventing empty gap */
>   		end_pos = round_up(pos + count, root->sectorsize);
> -		err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
> +		err = btrfs_cont_expand(inode, oldsize, end_pos);
>   		if (err) {
>   			mutex_unlock(&inode->i_mutex);
>   			goto out;
>   		}
> +		if (start_pos > round_up(oldsize, root->sectorsize))
> +			clean_page = 1;
>   	}
>
>   	if (sync)
> @@ -1814,6 +1819,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
>   		num_written = __btrfs_buffered_write(file, from, pos);
>   		if (num_written > 0)
>   			iocb->ki_pos = pos + num_written;
> +		if (clean_page)
> +			pagecache_isize_extended(inode, oldsize,
> +						i_size_read(inode));
>   	}
>
>   	mutex_unlock(&inode->i_mutex);
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index c937357..f31da87 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -4853,7 +4853,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
>   	}
>
>   	if (newsize > oldsize) {
> -		truncate_pagecache(inode, newsize);

So I don't understand why we are dropping this bit here, could you 
explain?  Otherwise the patch looks fine to me.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chandan Rajendra Oct. 2, 2015, 4:34 p.m. UTC | #2
On Thursday 01 Oct 2015 10:57:52 Josef Bacik wrote:
> On 09/30/2015 06:28 AM, Chandan Rajendra wrote:
> > When extending a file by either "truncate up" or by writing beyond i_size,
> > the page which had i_size needs to be marked "read only" so that future
> > writes to the page via mmap interface causes btrfs_page_mkwrite() to be
> > invoked. If not, a write performed after extending the file via the mmap
> > interface will find the page to be writaeable and continue writing to the
> > page without invoking btrfs_page_mkwrite() i.e. we end up writing to a
> > file without reserving disk space.
> > 
> > Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
> > ---
> > 
> >   fs/btrfs/file.c  | 12 ++++++++++--
> >   fs/btrfs/inode.c |  2 +-
> >   2 files changed, 11 insertions(+), 3 deletions(-)
> > 
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index 360d56d..5715e29 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -1757,6 +1757,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb
> > *iocb,> 
> >   	ssize_t err;
> >   	loff_t pos;
> >   	size_t count;
> > 
> > +	loff_t oldsize;
> > +	int clean_page = 0;
> > 
> >   	mutex_lock(&inode->i_mutex);
> >   	err = generic_write_checks(iocb, from);
> > 
> > @@ -1795,14 +1797,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb
> > *iocb,> 
> >   	pos = iocb->ki_pos;
> >   	count = iov_iter_count(from);
> >   	start_pos = round_down(pos, root->sectorsize);
> > 
> > -	if (start_pos > i_size_read(inode)) {
> > +	oldsize = i_size_read(inode);
> > +	if (start_pos > oldsize) {
> > 
> >   		/* Expand hole size to cover write data, preventing empty gap 
*/
> >   		end_pos = round_up(pos + count, root->sectorsize);
> > 
> > -		err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
> > +		err = btrfs_cont_expand(inode, oldsize, end_pos);
> > 
> >   		if (err) {
> >   		
> >   			mutex_unlock(&inode->i_mutex);
> >   			goto out;
> >   		
> >   		}
> > 
> > +		if (start_pos > round_up(oldsize, root->sectorsize))
> > +			clean_page = 1;
> > 
> >   	}
> >   	
> >   	if (sync)
> > 
> > @@ -1814,6 +1819,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb
> > *iocb,> 
> >   		num_written = __btrfs_buffered_write(file, from, pos);
> >   		if (num_written > 0)
> >   		
> >   			iocb->ki_pos = pos + num_written;
> > 
> > +		if (clean_page)
> > +			pagecache_isize_extended(inode, oldsize,
> > +						i_size_read(inode));
> > 
> >   	}
> >   	
> >   	mutex_unlock(&inode->i_mutex);
> > 
> > diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> > index c937357..f31da87 100644
> > --- a/fs/btrfs/inode.c
> > +++ b/fs/btrfs/inode.c
> > @@ -4853,7 +4853,6 @@ static int btrfs_setsize(struct inode *inode, struct
> > iattr *attr)> 
> >   	}
> >   	
> >   	if (newsize > oldsize) {
> > 
> > -		truncate_pagecache(inode, newsize);
> 
> So I don't understand why we are dropping this bit here, could you
> explain?  Otherwise the patch looks fine to me.  Thanks,
>
Josef, As per our previous discussion on IRC we found that the
"truncate_pagecache(inode, newsize)" statement to be a relic of the
past. During the test runs, I haven't seen any sort of failure caused by the
removal of this statement.
diff mbox

Patch

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 360d56d..5715e29 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1757,6 +1757,8 @@  static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	ssize_t err;
 	loff_t pos;
 	size_t count;
+	loff_t oldsize;
+	int clean_page = 0;
 
 	mutex_lock(&inode->i_mutex);
 	err = generic_write_checks(iocb, from);
@@ -1795,14 +1797,17 @@  static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	pos = iocb->ki_pos;
 	count = iov_iter_count(from);
 	start_pos = round_down(pos, root->sectorsize);
-	if (start_pos > i_size_read(inode)) {
+	oldsize = i_size_read(inode);
+	if (start_pos > oldsize) {
 		/* Expand hole size to cover write data, preventing empty gap */
 		end_pos = round_up(pos + count, root->sectorsize);
-		err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+		err = btrfs_cont_expand(inode, oldsize, end_pos);
 		if (err) {
 			mutex_unlock(&inode->i_mutex);
 			goto out;
 		}
+		if (start_pos > round_up(oldsize, root->sectorsize))
+			clean_page = 1;
 	}
 
 	if (sync)
@@ -1814,6 +1819,9 @@  static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 		num_written = __btrfs_buffered_write(file, from, pos);
 		if (num_written > 0)
 			iocb->ki_pos = pos + num_written;
+		if (clean_page)
+			pagecache_isize_extended(inode, oldsize,
+						i_size_read(inode));
 	}
 
 	mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c937357..f31da87 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4853,7 +4853,6 @@  static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 	}
 
 	if (newsize > oldsize) {
-		truncate_pagecache(inode, newsize);
 		/*
 		 * Don't do an expanding truncate while snapshoting is ongoing.
 		 * This is to ensure the snapshot captures a fully consistent
@@ -4876,6 +4875,7 @@  static int btrfs_setsize(struct inode *inode, struct iattr *attr)
 
 		i_size_write(inode, newsize);
 		btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+		pagecache_isize_extended(inode, oldsize, newsize);
 		ret = btrfs_update_inode(trans, root, inode);
 		btrfs_end_write_no_snapshoting(root);
 		btrfs_end_transaction(trans, root);