diff mbox

[6/7] nonblocking aio: xfs

Message ID 20170214024603.9563-7-rgoldwyn@suse.de (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Goldwyn Rodrigues Feb. 14, 2017, 2:46 a.m. UTC
From: Goldwyn Rodrigues <rgoldwyn@suse.com>

If IOCB_NONBLOCKING is set:
	+ Check if writing beyond end of file, if yes return EAGAIN
	- check if writing to a hole which does not have allocated
	  file blocks.
	- Check if i_rwsem is immediately lockable in xfs_rw_ilock()

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/xfs/xfs_file.c  | 36 ++++++++++++++++++++++++++++++++----
 fs/xfs/xfs_inode.h |  2 ++
 2 files changed, 34 insertions(+), 4 deletions(-)

Comments

Darrick J. Wong Feb. 14, 2017, 6:44 a.m. UTC | #1
On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>

Send the whole patchset to linux-xfs, please.

> If IOCB_NONBLOCKING is set:
> 	+ Check if writing beyond end of file, if yes return EAGAIN
> 	- check if writing to a hole which does not have allocated
> 	  file blocks.
> 	- Check if i_rwsem is immediately lockable in xfs_rw_ilock()
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>  fs/xfs/xfs_file.c  | 36 ++++++++++++++++++++++++++++++++----
>  fs/xfs/xfs_inode.h |  2 ++
>  2 files changed, 34 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 9a5d64b..42f055f 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -51,14 +51,20 @@ static const struct vm_operations_struct xfs_file_vm_ops;
>   * Locking primitives for read and write IO paths to ensure we consistently use
>   * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
>   */
> -static inline void
> +static inline int
>  xfs_rw_ilock(
>  	struct xfs_inode	*ip,
>  	int			type)
>  {
> -	if (type & XFS_IOLOCK_EXCL)
> -		inode_lock(VFS_I(ip));
> +	if (type & XFS_IOLOCK_EXCL) {
> +		if ((type & XFS_IOLOCK_NONBLOCKING) &&
> +				!inode_trylock(VFS_I(ip)))
> +			return -EAGAIN;
> +		else
> +			inode_lock(VFS_I(ip));
> +	}
>  	xfs_ilock(ip, type);
> +	return 0;

This function went away in 4.10-rc1.

>  }
>  
>  static inline void
> @@ -418,6 +424,24 @@ xfs_file_aio_write_checks(
>  	if (error <= 0)
>  		return error;
>  
> +	if (iocb->ki_flags & IOCB_NONBLOCKING) {
> +		struct xfs_bmbt_irec	imap;
> +		xfs_fileoff_t           offset_fsb, end_fsb;
> +		int			nimaps = 1, ret = 0;
> +		end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count);
> +		if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb)
> +			return -EAGAIN;
> +		/* Check if it is an unallocated hole */
> +		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos);
> +
> +		ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> +				&nimaps, 0);

You need to hold the ilock to call _bmapi_read, and I don't think
it's held here.  Did you CONFIG_XFS_DEBUG=y?

> +		if (ret)
> +			return ret;
> +		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK)
> +			return -EAGAIN;
> +	}
> +
>  	error = xfs_break_layouts(inode, iolock, true);
>  	if (error)
>  		return error;
> @@ -555,11 +579,15 @@ xfs_file_dio_aio_write(
>  	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
>  		unaligned_io = 1;
>  		iolock = XFS_IOLOCK_EXCL;
> +		if (iocb->ki_flags & IOCB_NONBLOCKING)
> +			iolock |= XFS_IOLOCK_NONBLOCKING;
>  	} else {
>  		iolock = XFS_IOLOCK_SHARED;
>  	}
>  
> -	xfs_rw_ilock(ip, iolock);
> +	ret = xfs_rw_ilock(ip, iolock);
> +	if (ret)
> +		return ret;
>  
>  	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
>  	if (ret)
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 71e8a81..1a2d5eb 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -283,6 +283,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
>  #define	XFS_ILOCK_SHARED	(1<<3)
>  #define	XFS_MMAPLOCK_EXCL	(1<<4)
>  #define	XFS_MMAPLOCK_SHARED	(1<<5)
> +#define	XFS_IOLOCK_NONBLOCKING	(1<<6)

What is the expected behavior if this is passed directly to xfs_ilock?

--D

>  
>  #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
>  				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
> @@ -291,6 +292,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip)
>  #define XFS_LOCK_FLAGS \
>  	{ XFS_IOLOCK_EXCL,	"IOLOCK_EXCL" }, \
>  	{ XFS_IOLOCK_SHARED,	"IOLOCK_SHARED" }, \
> +	{ XFS_IOLOCK_NONBLOCKING,	"IOLOCK_NONBLOCKING" }, \
>  	{ XFS_ILOCK_EXCL,	"ILOCK_EXCL" }, \
>  	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }, \
>  	{ XFS_MMAPLOCK_EXCL,	"MMAPLOCK_EXCL" }, \
> -- 
> 2.10.2
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig Feb. 14, 2017, 7:43 a.m. UTC | #2
On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote:
> From: Goldwyn Rodrigues <rgoldwyn@suse.com>

Please Cc the while series to all lists, otherwiwe it's impossible
to review the thing.

> 
> If IOCB_NONBLOCKING is set:
> 	+ Check if writing beyond end of file, if yes return EAGAIN
> 	- check if writing to a hole which does not have allocated
> 	  file blocks.
> 	- Check if i_rwsem is immediately lockable in xfs_rw_ilock()

Why the + vs - above? 

> -static inline void
> +static inline int
>  xfs_rw_ilock(

This function has been removed a while ago.

>  
> +	if (iocb->ki_flags & IOCB_NONBLOCKING) {
> +		struct xfs_bmbt_irec	imap;
> +		xfs_fileoff_t           offset_fsb, end_fsb;
> +		int			nimaps = 1, ret = 0;
> +		end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count);
> +		if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb)
> +			return -EAGAIN;

Bogus check, XFS supports async write beyond EOF if the space is
preallocated.

> +		/* Check if it is an unallocated hole */
> +		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos);
> +
> +		ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> +				&nimaps, 0);
> +		if (ret)
> +			return ret;
> +		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK)
> +			return -EAGAIN;

This would need the ilock.  But extent list lookups are expensive,
so please don't add another one here.  Just return when we hit the
first unallocated extent in xfs_bmapi_write - either a short write or
-EAGAIN if nothing has been written.

>  	error = xfs_break_layouts(inode, iolock, true);
>  	if (error)
>  		return error;

Additionally this can drop the iolock, so you might get a new
hole after it.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goldwyn Rodrigues Feb. 15, 2017, 3:30 p.m. UTC | #3
On 02/14/2017 01:43 AM, Christoph Hellwig wrote:
> On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote:
>> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
> 
> Please Cc the while series to all lists, otherwiwe it's impossible
> to review the thing.
> 
>>
>> If IOCB_NONBLOCKING is set:
>> 	+ Check if writing beyond end of file, if yes return EAGAIN
>> 	- check if writing to a hole which does not have allocated
>> 	  file blocks.
>> 	- Check if i_rwsem is immediately lockable in xfs_rw_ilock()
> 
> Why the + vs - above? 

A mistake. It does not mean anything besides bullets.

> 
>> -static inline void
>> +static inline int
>>  xfs_rw_ilock(
> 
> This function has been removed a while ago.

And thanks for putting in xfs_ilock_nowait(). This can't be done in a
cleaner way. I was very skeptical of adding yet another flag.

> 
>>  
>> +	if (iocb->ki_flags & IOCB_NONBLOCKING) {
>> +		struct xfs_bmbt_irec	imap;
>> +		xfs_fileoff_t           offset_fsb, end_fsb;
>> +		int			nimaps = 1, ret = 0;
>> +		end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count);
>> +		if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb)
>> +			return -EAGAIN;
> 
> Bogus check, XFS supports async write beyond EOF if the space is
> preallocated.

I assume this will be covered by xfs_bmapi_write().

> 
>> +		/* Check if it is an unallocated hole */
>> +		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos);
>> +
>> +		ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
>> +				&nimaps, 0);
>> +		if (ret)
>> +			return ret;
>> +		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK)
>> +			return -EAGAIN;
> 
> This would need the ilock.  But extent list lookups are expensive,
> so please don't add another one here.  Just return when we hit the
> first unallocated extent in xfs_bmapi_write - either a short write or
> -EAGAIN if nothing has been written.

So in xfs_bmapi_write (and referring to), I would need a new flag, say
XFS_BMAPI_NOALLOC which would bail to error0, setting error=-EAGAIN if
need_alloc or was_delay is set. This flag XFS_BMAPI_NOALLOC is set in
xfs_iomap_write_direct(). I did not understand short writes. Where can I
get a short write?

If I understand correctly, we do add the flag.

> 
>>  	error = xfs_break_layouts(inode, iolock, true);
>>  	if (error)
>>  		return error;
> 
> Additionally this can drop the iolock, so you might get a new
> hole after it.
>
Goldwyn Rodrigues Feb. 15, 2017, 4:11 p.m. UTC | #4
On 02/15/2017 09:30 AM, Goldwyn Rodrigues wrote:
> 
> 
> On 02/14/2017 01:43 AM, Christoph Hellwig wrote:
>> On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote:
>>> From: Goldwyn Rodrigues <rgoldwyn@suse.com>
>>
>> Please Cc the while series to all lists, otherwiwe it's impossible
>> to review the thing.
>>
>>>
>>> If IOCB_NONBLOCKING is set:
>>> 	+ Check if writing beyond end of file, if yes return EAGAIN
>>> 	- check if writing to a hole which does not have allocated
>>> 	  file blocks.
>>> 	- Check if i_rwsem is immediately lockable in xfs_rw_ilock()
>>
>> Why the + vs - above? 
> 
> A mistake. It does not mean anything besides bullets.
> 
>>
>>> -static inline void
>>> +static inline int
>>>  xfs_rw_ilock(
>>
>> This function has been removed a while ago.
> 
> And thanks for putting in xfs_ilock_nowait(). This can't be done in a
> cleaner way. I was very skeptical of adding yet another flag.
> 
>>
>>>  
>>> +	if (iocb->ki_flags & IOCB_NONBLOCKING) {
>>> +		struct xfs_bmbt_irec	imap;
>>> +		xfs_fileoff_t           offset_fsb, end_fsb;
>>> +		int			nimaps = 1, ret = 0;
>>> +		end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count);
>>> +		if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb)
>>> +			return -EAGAIN;
>>
>> Bogus check, XFS supports async write beyond EOF if the space is
>> preallocated.
> 
> I assume this will be covered by xfs_bmapi_write().
> 
>>
>>> +		/* Check if it is an unallocated hole */
>>> +		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos);
>>> +
>>> +		ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
>>> +				&nimaps, 0);
>>> +		if (ret)
>>> +			return ret;
>>> +		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK)
>>> +			return -EAGAIN;
>>
>> This would need the ilock.  But extent list lookups are expensive,
>> so please don't add another one here.  Just return when we hit the
>> first unallocated extent in xfs_bmapi_write - either a short write or
>> -EAGAIN if nothing has been written.
> 
> So in xfs_bmapi_write (and referring to), I would need a new flag, say
> XFS_BMAPI_NOALLOC which would bail to error0, setting error=-EAGAIN if
> need_alloc or was_delay is set. This flag XFS_BMAPI_NOALLOC is set in
> xfs_iomap_write_direct(). I did not understand short writes. Where can I
> get a short write?
> 
> If I understand correctly, we do add the flag.

Replying to myself to correct myself.

On reading a bit more, I figured that we perform
xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have
already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin
should be good enough. So, the flag required would be with iomap flags,
say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to
xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK.

> 
>>
>>>  	error = xfs_break_layouts(inode, iolock, true);
>>>  	if (error)
>>>  		return error;
>>
>> Additionally this can drop the iolock, so you might get a new
>> hole after it.
>>
>
Christoph Hellwig Feb. 16, 2017, 8:21 p.m. UTC | #5
On Wed, Feb 15, 2017 at 10:11:38AM -0600, Goldwyn Rodrigues wrote:
> > I did not understand short writes. Where can I
> > get a short write?

If you have a write request of N bytes, and you've already wrіtten
M of them you return M from the *write system call instead of -EAGAIN.
This is standard practice on e.g. sockets.

> > 
> > If I understand correctly, we do add the flag.
> 
> Replying to myself to correct myself.
> 
> On reading a bit more, I figured that we perform
> xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have
> already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin
> should be good enough. So, the flag required would be with iomap flags,
> say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to
> xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK.

Yes, except that reflinked files with shared extents will also need
some additional special casing - for those xfs_bmapi_read can return
an allocated extent, but we might still have to perform a block
allocation for a write.
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goldwyn Rodrigues Feb. 16, 2017, 8:44 p.m. UTC | #6
On 02/16/2017 02:21 PM, Christoph Hellwig wrote:
> On Wed, Feb 15, 2017 at 10:11:38AM -0600, Goldwyn Rodrigues wrote:
>>> I did not understand short writes. Where can I
>>> get a short write?
> 
> If you have a write request of N bytes, and you've already wrіtten
> M of them you return M from the *write system call instead of -EAGAIN.
> This is standard practice on e.g. sockets.

Oh, I assume that would be taken care of in the existing code, at least
with the modified patch. I will double check that anyways.

> 
>>>
>>> If I understand correctly, we do add the flag.
>>
>> Replying to myself to correct myself.
>>
>> On reading a bit more, I figured that we perform
>> xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have
>> already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin
>> should be good enough. So, the flag required would be with iomap flags,
>> say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to
>> xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK.
> 
> Yes, except that reflinked files with shared extents will also need
> some additional special casing - for those xfs_bmapi_read can return
> an allocated extent, but we might still have to perform a block
> allocation for a write.
> 

Yes, I forgot that. I will put in a check for reflinks as well.
diff mbox

Patch

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9a5d64b..42f055f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -51,14 +51,20 @@  static const struct vm_operations_struct xfs_file_vm_ops;
  * Locking primitives for read and write IO paths to ensure we consistently use
  * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
  */
-static inline void
+static inline int
 xfs_rw_ilock(
 	struct xfs_inode	*ip,
 	int			type)
 {
-	if (type & XFS_IOLOCK_EXCL)
-		inode_lock(VFS_I(ip));
+	if (type & XFS_IOLOCK_EXCL) {
+		if ((type & XFS_IOLOCK_NONBLOCKING) &&
+				!inode_trylock(VFS_I(ip)))
+			return -EAGAIN;
+		else
+			inode_lock(VFS_I(ip));
+	}
 	xfs_ilock(ip, type);
+	return 0;
 }
 
 static inline void
@@ -418,6 +424,24 @@  xfs_file_aio_write_checks(
 	if (error <= 0)
 		return error;
 
+	if (iocb->ki_flags & IOCB_NONBLOCKING) {
+		struct xfs_bmbt_irec	imap;
+		xfs_fileoff_t           offset_fsb, end_fsb;
+		int			nimaps = 1, ret = 0;
+		end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count);
+		if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb)
+			return -EAGAIN;
+		/* Check if it is an unallocated hole */
+		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos);
+
+		ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+				&nimaps, 0);
+		if (ret)
+			return ret;
+		if (!nimaps || imap.br_startblock == HOLESTARTBLOCK)
+			return -EAGAIN;
+	}
+
 	error = xfs_break_layouts(inode, iolock, true);
 	if (error)
 		return error;
@@ -555,11 +579,15 @@  xfs_file_dio_aio_write(
 	    ((iocb->ki_pos + count) & mp->m_blockmask)) {
 		unaligned_io = 1;
 		iolock = XFS_IOLOCK_EXCL;
+		if (iocb->ki_flags & IOCB_NONBLOCKING)
+			iolock |= XFS_IOLOCK_NONBLOCKING;
 	} else {
 		iolock = XFS_IOLOCK_SHARED;
 	}
 
-	xfs_rw_ilock(ip, iolock);
+	ret = xfs_rw_ilock(ip, iolock);
+	if (ret)
+		return ret;
 
 	ret = xfs_file_aio_write_checks(iocb, from, &iolock);
 	if (ret)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 71e8a81..1a2d5eb 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -283,6 +283,7 @@  static inline void xfs_ifunlock(struct xfs_inode *ip)
 #define	XFS_ILOCK_SHARED	(1<<3)
 #define	XFS_MMAPLOCK_EXCL	(1<<4)
 #define	XFS_MMAPLOCK_SHARED	(1<<5)
+#define	XFS_IOLOCK_NONBLOCKING	(1<<6)
 
 #define XFS_LOCK_MASK		(XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
 				| XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
@@ -291,6 +292,7 @@  static inline void xfs_ifunlock(struct xfs_inode *ip)
 #define XFS_LOCK_FLAGS \
 	{ XFS_IOLOCK_EXCL,	"IOLOCK_EXCL" }, \
 	{ XFS_IOLOCK_SHARED,	"IOLOCK_SHARED" }, \
+	{ XFS_IOLOCK_NONBLOCKING,	"IOLOCK_NONBLOCKING" }, \
 	{ XFS_ILOCK_EXCL,	"ILOCK_EXCL" }, \
 	{ XFS_ILOCK_SHARED,	"ILOCK_SHARED" }, \
 	{ XFS_MMAPLOCK_EXCL,	"MMAPLOCK_EXCL" }, \