diff mbox series

[11/15] btrfs: Use inode_lock_shared() for direct writes within EOF

Message ID 20200921144353.31319-12-rgoldwyn@suse.de (mailing list archive)
State New, archived
Headers show
Series BTRFS DIO inode locking/D_SYNC fix | expand

Commit Message

Goldwyn Rodrigues Sept. 21, 2020, 2:43 p.m. UTC
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

Direct writes within EOF are safe to be performed with inode shared lock
to improve parallelization with other direct writes or reads because EOF
is not changed and there is no race with truncate().

Direct reads are already performed under shared inode lock.

This patch is precursor to removing btrfs_inode->dio_sem.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/btrfs/file.c | 33 +++++++++++++++++++++------------
 1 file changed, 21 insertions(+), 12 deletions(-)

Comments

Josef Bacik Sept. 22, 2020, 2:52 p.m. UTC | #1
On 9/21/20 10:43 AM, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Direct writes within EOF are safe to be performed with inode shared lock
> to improve parallelization with other direct writes or reads because EOF
> is not changed and there is no race with truncate().
> 
> Direct reads are already performed under shared inode lock.
> 
> This patch is precursor to removing btrfs_inode->dio_sem.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   fs/btrfs/file.c | 33 +++++++++++++++++++++------------
>   1 file changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index d9c3be19d7b3..50092d24eee2 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -1977,7 +1977,6 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
>   	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>   	loff_t pos;
>   	ssize_t written = 0;
> -	bool relock = false;
>   	ssize_t written_buffered;
>   	loff_t endbyte;
>   	int err;
> @@ -1986,6 +1985,15 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
>   	if (iocb->ki_flags & IOCB_NOWAIT)
>   		ilock_flags |= BTRFS_ILOCK_TRY;
>   
> +	/*
> +	 * If the write DIO within EOF,  use a shared lock
> +	 */
> +	if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
> +		ilock_flags |= BTRFS_ILOCK_SHARED;
> +	else if (iocb->ki_flags & IOCB_NOWAIT)
> +		return -EAGAIN;
> +
> +relock:

Huh?  Why are you making it so EOF extending NOWAIT writes now fail?  We are 
still using ILOCK_TRY here, so we may still not block, am I missing something? 
Thanks,

Josef
Goldwyn Rodrigues Sept. 22, 2020, 5:33 p.m. UTC | #2
On 10:52 22/09, Josef Bacik wrote:
> On 9/21/20 10:43 AM, Goldwyn Rodrigues wrote:
> > From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > 
> > Direct writes within EOF are safe to be performed with inode shared lock
> > to improve parallelization with other direct writes or reads because EOF
> > is not changed and there is no race with truncate().
> > 
> > Direct reads are already performed under shared inode lock.
> > 
> > This patch is precursor to removing btrfs_inode->dio_sem.
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> > ---
> >   fs/btrfs/file.c | 33 +++++++++++++++++++++------------
> >   1 file changed, 21 insertions(+), 12 deletions(-)
> > 
> > diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> > index d9c3be19d7b3..50092d24eee2 100644
> > --- a/fs/btrfs/file.c
> > +++ b/fs/btrfs/file.c
> > @@ -1977,7 +1977,6 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
> >   	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
> >   	loff_t pos;
> >   	ssize_t written = 0;
> > -	bool relock = false;
> >   	ssize_t written_buffered;
> >   	loff_t endbyte;
> >   	int err;
> > @@ -1986,6 +1985,15 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
> >   	if (iocb->ki_flags & IOCB_NOWAIT)
> >   		ilock_flags |= BTRFS_ILOCK_TRY;
> > +	/*
> > +	 * If the write DIO within EOF,  use a shared lock
> > +	 */
> > +	if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
> > +		ilock_flags |= BTRFS_ILOCK_SHARED;
> > +	else if (iocb->ki_flags & IOCB_NOWAIT)
> > +		return -EAGAIN;
> > +
> > +relock:
> 
> Huh?  Why are you making it so EOF extending NOWAIT writes now fail?  We are
> still using ILOCK_TRY here, so we may still not block, am I missing
> something? Thanks,
> 

Yes, this is incorrect. I had thought of this would block on disk space
allocations. But did not consider the prealloc case.

I am removing this check to match the previous behavior.
diff mbox series

Patch

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index d9c3be19d7b3..50092d24eee2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1977,7 +1977,6 @@  static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	loff_t pos;
 	ssize_t written = 0;
-	bool relock = false;
 	ssize_t written_buffered;
 	loff_t endbyte;
 	int err;
@@ -1986,6 +1985,15 @@  static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	if (iocb->ki_flags & IOCB_NOWAIT)
 		ilock_flags |= BTRFS_ILOCK_TRY;
 
+	/*
+	 * If the write DIO within EOF,  use a shared lock
+	 */
+	if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode))
+		ilock_flags |= BTRFS_ILOCK_SHARED;
+	else if (iocb->ki_flags & IOCB_NOWAIT)
+		return -EAGAIN;
+
+relock:
 	err = btrfs_inode_lock(inode, ilock_flags);
 	if (err < 0)
 		return err;
@@ -1997,21 +2005,23 @@  static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	}
 
 	pos = iocb->ki_pos;
+	/*
+	 * Re-check since file size may have changed
+	 * just before taking the lock or pos may have changed
+	 * because of O_APPEND in generic_write_check()
+	 */
+	if ((ilock_flags & BTRFS_ILOCK_SHARED) &&
+	    pos + iov_iter_count(from) > i_size_read(inode)) {
+		btrfs_inode_unlock(inode, ilock_flags);
+		ilock_flags &= ~BTRFS_ILOCK_SHARED;
+		goto relock;
+	}
 
 	if (check_direct_IO(fs_info, from, pos)) {
 		btrfs_inode_unlock(inode, ilock_flags);
 		goto buffered;
 	}
 
-	/*
-	 * If the write DIO is beyond the EOF, we need update
-	 * the isize, but it is protected by i_mutex. So we can
-	 * not unlock the i_mutex at this case.
-	 */
-	if (pos + iov_iter_count(from) <= inode->i_size) {
-		btrfs_inode_unlock(inode, 0);
-		relock = true;
-	}
 	down_read(&BTRFS_I(inode)->dio_sem);
 
 	/*
@@ -2029,8 +2039,7 @@  static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 		written = 0;
 
 	up_read(&BTRFS_I(inode)->dio_sem);
-	if (relock)
-		btrfs_inode_lock(inode, 0);
+	btrfs_inode_unlock(inode, ilock_flags);
 
 	if (written < 0 || !iov_iter_count(from)) {
 		err = written;