Message ID | 20170214024603.9563-7-rgoldwyn@suse.de (mailing list archive) |
---|---|
State | Not Applicable, archived |
Headers | show |
On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> Send the whole patchset to linux-xfs, please. > If IOCB_NONBLOCKING is set: > + Check if writing beyond end of file, if yes return EAGAIN > - check if writing to a hole which does not have allocated > file blocks. > - Check if i_rwsem is immediately lockable in xfs_rw_ilock() > > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com> > --- > fs/xfs/xfs_file.c | 36 ++++++++++++++++++++++++++++++++---- > fs/xfs/xfs_inode.h | 2 ++ > 2 files changed, 34 insertions(+), 4 deletions(-) > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index 9a5d64b..42f055f 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -51,14 +51,20 @@ static const struct vm_operations_struct xfs_file_vm_ops; > * Locking primitives for read and write IO paths to ensure we consistently use > * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. > */ > -static inline void > +static inline int > xfs_rw_ilock( > struct xfs_inode *ip, > int type) > { > - if (type & XFS_IOLOCK_EXCL) > - inode_lock(VFS_I(ip)); > + if (type & XFS_IOLOCK_EXCL) { > + if ((type & XFS_IOLOCK_NONBLOCKING) && > + !inode_trylock(VFS_I(ip))) > + return -EAGAIN; > + else > + inode_lock(VFS_I(ip)); > + } > xfs_ilock(ip, type); > + return 0; This function went away in 4.10-rc1. > } > > static inline void > @@ -418,6 +424,24 @@ xfs_file_aio_write_checks( > if (error <= 0) > return error; > > + if (iocb->ki_flags & IOCB_NONBLOCKING) { > + struct xfs_bmbt_irec imap; > + xfs_fileoff_t offset_fsb, end_fsb; > + int nimaps = 1, ret = 0; > + end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count); > + if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb) > + return -EAGAIN; > + /* Check if it is an unallocated hole */ > + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos); > + > + ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, > + &nimaps, 0); You need to hold the ilock to call _bmapi_read, and I don't think it's held here. Did you CONFIG_XFS_DEBUG=y? > + if (ret) > + return ret; > + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) > + return -EAGAIN; > + } > + > error = xfs_break_layouts(inode, iolock, true); > if (error) > return error; > @@ -555,11 +579,15 @@ xfs_file_dio_aio_write( > ((iocb->ki_pos + count) & mp->m_blockmask)) { > unaligned_io = 1; > iolock = XFS_IOLOCK_EXCL; > + if (iocb->ki_flags & IOCB_NONBLOCKING) > + iolock |= XFS_IOLOCK_NONBLOCKING; > } else { > iolock = XFS_IOLOCK_SHARED; > } > > - xfs_rw_ilock(ip, iolock); > + ret = xfs_rw_ilock(ip, iolock); > + if (ret) > + return ret; > > ret = xfs_file_aio_write_checks(iocb, from, &iolock); > if (ret) > diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h > index 71e8a81..1a2d5eb 100644 > --- a/fs/xfs/xfs_inode.h > +++ b/fs/xfs/xfs_inode.h > @@ -283,6 +283,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) > #define XFS_ILOCK_SHARED (1<<3) > #define XFS_MMAPLOCK_EXCL (1<<4) > #define XFS_MMAPLOCK_SHARED (1<<5) > +#define XFS_IOLOCK_NONBLOCKING (1<<6) What is the expected behavior if this is passed directly to xfs_ilock? --D > > #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ > | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ > @@ -291,6 +292,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) > #define XFS_LOCK_FLAGS \ > { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ > { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ > + { XFS_IOLOCK_NONBLOCKING, "IOLOCK_NONBLOCKING" }, \ > { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ > { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ > { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \ > -- > 2.10.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote: > From: Goldwyn Rodrigues <rgoldwyn@suse.com> Please Cc the while series to all lists, otherwiwe it's impossible to review the thing. > > If IOCB_NONBLOCKING is set: > + Check if writing beyond end of file, if yes return EAGAIN > - check if writing to a hole which does not have allocated > file blocks. > - Check if i_rwsem is immediately lockable in xfs_rw_ilock() Why the + vs - above? > -static inline void > +static inline int > xfs_rw_ilock( This function has been removed a while ago. > > + if (iocb->ki_flags & IOCB_NONBLOCKING) { > + struct xfs_bmbt_irec imap; > + xfs_fileoff_t offset_fsb, end_fsb; > + int nimaps = 1, ret = 0; > + end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count); > + if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb) > + return -EAGAIN; Bogus check, XFS supports async write beyond EOF if the space is preallocated. > + /* Check if it is an unallocated hole */ > + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos); > + > + ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, > + &nimaps, 0); > + if (ret) > + return ret; > + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) > + return -EAGAIN; This would need the ilock. But extent list lookups are expensive, so please don't add another one here. Just return when we hit the first unallocated extent in xfs_bmapi_write - either a short write or -EAGAIN if nothing has been written. > error = xfs_break_layouts(inode, iolock, true); > if (error) > return error; Additionally this can drop the iolock, so you might get a new hole after it. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/14/2017 01:43 AM, Christoph Hellwig wrote: > On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote: >> From: Goldwyn Rodrigues <rgoldwyn@suse.com> > > Please Cc the while series to all lists, otherwiwe it's impossible > to review the thing. > >> >> If IOCB_NONBLOCKING is set: >> + Check if writing beyond end of file, if yes return EAGAIN >> - check if writing to a hole which does not have allocated >> file blocks. >> - Check if i_rwsem is immediately lockable in xfs_rw_ilock() > > Why the + vs - above? A mistake. It does not mean anything besides bullets. > >> -static inline void >> +static inline int >> xfs_rw_ilock( > > This function has been removed a while ago. And thanks for putting in xfs_ilock_nowait(). This can't be done in a cleaner way. I was very skeptical of adding yet another flag. > >> >> + if (iocb->ki_flags & IOCB_NONBLOCKING) { >> + struct xfs_bmbt_irec imap; >> + xfs_fileoff_t offset_fsb, end_fsb; >> + int nimaps = 1, ret = 0; >> + end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count); >> + if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb) >> + return -EAGAIN; > > Bogus check, XFS supports async write beyond EOF if the space is > preallocated. I assume this will be covered by xfs_bmapi_write(). > >> + /* Check if it is an unallocated hole */ >> + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos); >> + >> + ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, >> + &nimaps, 0); >> + if (ret) >> + return ret; >> + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) >> + return -EAGAIN; > > This would need the ilock. But extent list lookups are expensive, > so please don't add another one here. Just return when we hit the > first unallocated extent in xfs_bmapi_write - either a short write or > -EAGAIN if nothing has been written. So in xfs_bmapi_write (and referring to), I would need a new flag, say XFS_BMAPI_NOALLOC which would bail to error0, setting error=-EAGAIN if need_alloc or was_delay is set. This flag XFS_BMAPI_NOALLOC is set in xfs_iomap_write_direct(). I did not understand short writes. Where can I get a short write? If I understand correctly, we do add the flag. > >> error = xfs_break_layouts(inode, iolock, true); >> if (error) >> return error; > > Additionally this can drop the iolock, so you might get a new > hole after it. >
On 02/15/2017 09:30 AM, Goldwyn Rodrigues wrote: > > > On 02/14/2017 01:43 AM, Christoph Hellwig wrote: >> On Mon, Feb 13, 2017 at 08:46:02PM -0600, Goldwyn Rodrigues wrote: >>> From: Goldwyn Rodrigues <rgoldwyn@suse.com> >> >> Please Cc the while series to all lists, otherwiwe it's impossible >> to review the thing. >> >>> >>> If IOCB_NONBLOCKING is set: >>> + Check if writing beyond end of file, if yes return EAGAIN >>> - check if writing to a hole which does not have allocated >>> file blocks. >>> - Check if i_rwsem is immediately lockable in xfs_rw_ilock() >> >> Why the + vs - above? > > A mistake. It does not mean anything besides bullets. > >> >>> -static inline void >>> +static inline int >>> xfs_rw_ilock( >> >> This function has been removed a while ago. > > And thanks for putting in xfs_ilock_nowait(). This can't be done in a > cleaner way. I was very skeptical of adding yet another flag. > >> >>> >>> + if (iocb->ki_flags & IOCB_NONBLOCKING) { >>> + struct xfs_bmbt_irec imap; >>> + xfs_fileoff_t offset_fsb, end_fsb; >>> + int nimaps = 1, ret = 0; >>> + end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count); >>> + if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb) >>> + return -EAGAIN; >> >> Bogus check, XFS supports async write beyond EOF if the space is >> preallocated. > > I assume this will be covered by xfs_bmapi_write(). > >> >>> + /* Check if it is an unallocated hole */ >>> + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos); >>> + >>> + ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, >>> + &nimaps, 0); >>> + if (ret) >>> + return ret; >>> + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) >>> + return -EAGAIN; >> >> This would need the ilock. But extent list lookups are expensive, >> so please don't add another one here. Just return when we hit the >> first unallocated extent in xfs_bmapi_write - either a short write or >> -EAGAIN if nothing has been written. > > So in xfs_bmapi_write (and referring to), I would need a new flag, say > XFS_BMAPI_NOALLOC which would bail to error0, setting error=-EAGAIN if > need_alloc or was_delay is set. This flag XFS_BMAPI_NOALLOC is set in > xfs_iomap_write_direct(). I did not understand short writes. Where can I > get a short write? > > If I understand correctly, we do add the flag. Replying to myself to correct myself. On reading a bit more, I figured that we perform xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin should be good enough. So, the flag required would be with iomap flags, say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK. > >> >>> error = xfs_break_layouts(inode, iolock, true); >>> if (error) >>> return error; >> >> Additionally this can drop the iolock, so you might get a new >> hole after it. >> >
On Wed, Feb 15, 2017 at 10:11:38AM -0600, Goldwyn Rodrigues wrote: > > I did not understand short writes. Where can I > > get a short write? If you have a write request of N bytes, and you've already wrіtten M of them you return M from the *write system call instead of -EAGAIN. This is standard practice on e.g. sockets. > > > > If I understand correctly, we do add the flag. > > Replying to myself to correct myself. > > On reading a bit more, I figured that we perform > xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have > already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin > should be good enough. So, the flag required would be with iomap flags, > say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to > xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK. Yes, except that reflinked files with shared extents will also need some additional special casing - for those xfs_bmapi_read can return an allocated extent, but we might still have to perform a block allocation for a write. -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 02/16/2017 02:21 PM, Christoph Hellwig wrote: > On Wed, Feb 15, 2017 at 10:11:38AM -0600, Goldwyn Rodrigues wrote: >>> I did not understand short writes. Where can I >>> get a short write? > > If you have a write request of N bytes, and you've already wrіtten > M of them you return M from the *write system call instead of -EAGAIN. > This is standard practice on e.g. sockets. Oh, I assume that would be taken care of in the existing code, at least with the modified patch. I will double check that anyways. > >>> >>> If I understand correctly, we do add the flag. >> >> Replying to myself to correct myself. >> >> On reading a bit more, I figured that we perform >> xfs_file_iomap_begin->xfs_iomap_write_direct. At this point we have >> already performed xfs_bmapi_read(). So, a check in xfs_file_iomap_begin >> should be good enough. So, the flag required would be with iomap flags, >> say IOMAP_NONBLOCKING. IOW, we don't need to go all the way to >> xfs_bmap_write() and return when imap.br_startblock == HOLESTARTBLOCK. > > Yes, except that reflinked files with shared extents will also need > some additional special casing - for those xfs_bmapi_read can return > an allocated extent, but we might still have to perform a block > allocation for a write. > Yes, I forgot that. I will put in a check for reflinks as well.
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 9a5d64b..42f055f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -51,14 +51,20 @@ static const struct vm_operations_struct xfs_file_vm_ops; * Locking primitives for read and write IO paths to ensure we consistently use * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. */ -static inline void +static inline int xfs_rw_ilock( struct xfs_inode *ip, int type) { - if (type & XFS_IOLOCK_EXCL) - inode_lock(VFS_I(ip)); + if (type & XFS_IOLOCK_EXCL) { + if ((type & XFS_IOLOCK_NONBLOCKING) && + !inode_trylock(VFS_I(ip))) + return -EAGAIN; + else + inode_lock(VFS_I(ip)); + } xfs_ilock(ip, type); + return 0; } static inline void @@ -418,6 +424,24 @@ xfs_file_aio_write_checks( if (error <= 0) return error; + if (iocb->ki_flags & IOCB_NONBLOCKING) { + struct xfs_bmbt_irec imap; + xfs_fileoff_t offset_fsb, end_fsb; + int nimaps = 1, ret = 0; + end_fsb = XFS_B_TO_FSB(ip->i_mount, iocb->ki_pos + count); + if (XFS_B_TO_FSB(ip->i_mount, i_size_read(inode)) < end_fsb) + return -EAGAIN; + /* Check if it is an unallocated hole */ + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, iocb->ki_pos); + + ret = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, 0); + if (ret) + return ret; + if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) + return -EAGAIN; + } + error = xfs_break_layouts(inode, iolock, true); if (error) return error; @@ -555,11 +579,15 @@ xfs_file_dio_aio_write( ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; iolock = XFS_IOLOCK_EXCL; + if (iocb->ki_flags & IOCB_NONBLOCKING) + iolock |= XFS_IOLOCK_NONBLOCKING; } else { iolock = XFS_IOLOCK_SHARED; } - xfs_rw_ilock(ip, iolock); + ret = xfs_rw_ilock(ip, iolock); + if (ret) + return ret; ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 71e8a81..1a2d5eb 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -283,6 +283,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) #define XFS_ILOCK_SHARED (1<<3) #define XFS_MMAPLOCK_EXCL (1<<4) #define XFS_MMAPLOCK_SHARED (1<<5) +#define XFS_IOLOCK_NONBLOCKING (1<<6) #define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \ @@ -291,6 +292,7 @@ static inline void xfs_ifunlock(struct xfs_inode *ip) #define XFS_LOCK_FLAGS \ { XFS_IOLOCK_EXCL, "IOLOCK_EXCL" }, \ { XFS_IOLOCK_SHARED, "IOLOCK_SHARED" }, \ + { XFS_IOLOCK_NONBLOCKING, "IOLOCK_NONBLOCKING" }, \ { XFS_ILOCK_EXCL, "ILOCK_EXCL" }, \ { XFS_ILOCK_SHARED, "ILOCK_SHARED" }, \ { XFS_MMAPLOCK_EXCL, "MMAPLOCK_EXCL" }, \