Message ID | 1428420944-20965-1-git-send-email-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Apr 07, 2015 at 05:35:44PM +0200, Christoph Hellwig wrote: > We want to drop all I/O path locks when recalling layouts, and that includes > i_mutex for the write path. Without this we get stuck processe when recalls > take too long. Also if the writer is an nfsd thread than we'd rather just error out than wait. --b. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > fs/xfs/xfs_file.c | 4 ++-- > fs/xfs/xfs_ioctl.c | 2 +- > fs/xfs/xfs_iops.c | 2 +- > fs/xfs/xfs_pnfs.c | 7 ++++++- > fs/xfs/xfs_pnfs.h | 2 +- > 5 files changed, 11 insertions(+), 6 deletions(-) > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c > index a2e1cb8..963dfb2 100644 > --- a/fs/xfs/xfs_file.c > +++ b/fs/xfs/xfs_file.c > @@ -559,7 +559,7 @@ restart: > if (error) > return error; > > - error = xfs_break_layouts(inode, iolock); > + error = xfs_break_layouts(inode, iolock, true); > if (error) > return error; > > @@ -843,7 +843,7 @@ xfs_file_fallocate( > return -EOPNOTSUPP; > > xfs_ilock(ip, iolock); > - error = xfs_break_layouts(inode, &iolock); > + error = xfs_break_layouts(inode, &iolock, false); > if (error) > goto out_unlock; > > diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c > index ac4feae..3a21cc7 100644 > --- a/fs/xfs/xfs_ioctl.c > +++ b/fs/xfs/xfs_ioctl.c > @@ -639,7 +639,7 @@ xfs_ioc_space( > return error; > > xfs_ilock(ip, iolock); > - error = xfs_break_layouts(inode, &iolock); > + error = xfs_break_layouts(inode, &iolock, false); > if (error) > goto out_unlock; > > diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c > index e53a903..79520c7 100644 > --- a/fs/xfs/xfs_iops.c > +++ b/fs/xfs/xfs_iops.c > @@ -975,7 +975,7 @@ xfs_vn_setattr( > uint iolock = XFS_IOLOCK_EXCL; > > xfs_ilock(ip, iolock); > - error = xfs_break_layouts(dentry->d_inode, &iolock); > + error = xfs_break_layouts(dentry->d_inode, &iolock, true); > if (!error) > error = xfs_setattr_size(ip, iattr); > xfs_iunlock(ip, iolock); > diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c > index 365dd57..981a657 100644 > --- a/fs/xfs/xfs_pnfs.c > +++ b/fs/xfs/xfs_pnfs.c > @@ -31,7 +31,8 @@ > int > xfs_break_layouts( > struct inode *inode, > - uint *iolock) > + uint *iolock, > + bool with_imutex) > { > struct xfs_inode *ip = XFS_I(inode); > int error; > @@ -40,8 +41,12 @@ xfs_break_layouts( > > while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { > xfs_iunlock(ip, *iolock); > + if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) > + mutex_unlock(&inode->i_mutex); > error = break_layout(inode, true); > *iolock = XFS_IOLOCK_EXCL; > + if (with_imutex) > + mutex_lock(&inode->i_mutex); > xfs_ilock(ip, *iolock); > } > > diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h > index b7fbfce..f749475 100644 > --- a/fs/xfs/xfs_pnfs.h > +++ b/fs/xfs/xfs_pnfs.h > @@ -8,7 +8,7 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, > int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, > struct iattr *iattr); > > -int xfs_break_layouts(struct inode *inode, uint *iolock); > +int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex); > #else > static inline int xfs_break_layouts(struct inode *inode, uint *iolock) > { > -- > 1.9.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, Apr 07, 2015 at 05:35:44PM +0200, Christoph Hellwig wrote: > We want to drop all I/O path locks when recalling layouts, and that includes > i_mutex for the write path. Without this we get stuck processe when recalls > take too long. > > Signed-off-by: Christoph Hellwig <hch@lst.de> ..... > xfs_iunlock(ip, iolock); > diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c > index 365dd57..981a657 100644 > --- a/fs/xfs/xfs_pnfs.c > +++ b/fs/xfs/xfs_pnfs.c > @@ -31,7 +31,8 @@ > int > xfs_break_layouts( > struct inode *inode, > - uint *iolock) > + uint *iolock, > + bool with_imutex) > { > struct xfs_inode *ip = XFS_I(inode); > int error; > @@ -40,8 +41,12 @@ xfs_break_layouts( > > while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { > xfs_iunlock(ip, *iolock); > + if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) > + mutex_unlock(&inode->i_mutex); > error = break_layout(inode, true); > *iolock = XFS_IOLOCK_EXCL; > + if (with_imutex) > + mutex_lock(&inode->i_mutex); > xfs_ilock(ip, *iolock); > } That's kinda nasty, and it has no documentation explaining when or why we'd need to drop the i_mutex. How are we supposed to know if we need to drop the i_mutex or not? What happens if the upper VFS layers change or we have a multiple call paths that have different i_mutex contexts (i.e. one holds, another doesn't)? Which makes me wonder - is this layout breaking stuff at the right layer? Cheers, Dave.
On Tue, Apr 07, 2015 at 05:07:47PM -0400, J. Bruce Fields wrote: > On Tue, Apr 07, 2015 at 05:35:44PM +0200, Christoph Hellwig wrote: > > We want to drop all I/O path locks when recalling layouts, and that includes > > i_mutex for the write path. Without this we get stuck processe when recalls > > take too long. > > Also if the writer is an nfsd thread than we'd rather just error out > than wait. We have no way to know we are called by nfsd here unfortunately. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 08, 2015 at 08:19:27AM +1000, Dave Chinner wrote: > That's kinda nasty, and it has no documentation explaining when or > why we'd need to drop the i_mutex. How are we supposed to know if we > need to drop the i_mutex or not? We need to drop it if we hold it, pretty easy. > What happens if the upper VFS > layers change or we have a multiple call paths that have different > i_mutex contexts (i.e. one holds, another doesn't)? We avoid this in the VFS, as everytime we had it filesystems were getting it wrong. However you have a point in that we should probably have asserts that the right locks are held. > Which makes me wonder - is this layout breaking stuff at the right > layer? We can't do it in the VFS as it needs to be atomic vs the lock that protects write in ->write and ->fallocate, which is only taken in the filesystem. For ->setattr in theory we could do it in the VFS, but if the other callers can't do it in the VFS that will just lead to code duplication. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, Apr 08, 2015 at 06:21:04PM +0200, Christoph Hellwig wrote: > On Tue, Apr 07, 2015 at 05:07:47PM -0400, J. Bruce Fields wrote: > > On Tue, Apr 07, 2015 at 05:35:44PM +0200, Christoph Hellwig wrote: > > > We want to drop all I/O path locks when recalling layouts, and that includes > > > i_mutex for the write path. Without this we get stuck processe when recalls > > > take too long. > > > > Also if the writer is an nfsd thread than we'd rather just error out > > than wait. (To be clear: ACK to this patch as far as I'm concerned, I've got another concern but we need this fix regardless.) > We have no way to know we are called by nfsd here unfortunately. I was imagining the possible deadlock here as mostly theoretical, but now that I think of it it doesn't sound unlikely at all: - file is under heavy write load - conflicting operation breaks layout - nfsd threads all block in writes to that file - no nfsd threads available to service layout return - recall times out, client fenced. Ugh. --b. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a2e1cb8..963dfb2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -559,7 +559,7 @@ restart: if (error) return error; - error = xfs_break_layouts(inode, iolock); + error = xfs_break_layouts(inode, iolock, true); if (error) return error; @@ -843,7 +843,7 @@ xfs_file_fallocate( return -EOPNOTSUPP; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_layouts(inode, &iolock, false); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index ac4feae..3a21cc7 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -639,7 +639,7 @@ xfs_ioc_space( return error; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock); + error = xfs_break_layouts(inode, &iolock, false); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index e53a903..79520c7 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -975,7 +975,7 @@ xfs_vn_setattr( uint iolock = XFS_IOLOCK_EXCL; xfs_ilock(ip, iolock); - error = xfs_break_layouts(dentry->d_inode, &iolock); + error = xfs_break_layouts(dentry->d_inode, &iolock, true); if (!error) error = xfs_setattr_size(ip, iattr); xfs_iunlock(ip, iolock); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 365dd57..981a657 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -31,7 +31,8 @@ int xfs_break_layouts( struct inode *inode, - uint *iolock) + uint *iolock, + bool with_imutex) { struct xfs_inode *ip = XFS_I(inode); int error; @@ -40,8 +41,12 @@ xfs_break_layouts( while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { xfs_iunlock(ip, *iolock); + if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) + mutex_unlock(&inode->i_mutex); error = break_layout(inode, true); *iolock = XFS_IOLOCK_EXCL; + if (with_imutex) + mutex_lock(&inode->i_mutex); xfs_ilock(ip, *iolock); } diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index b7fbfce..f749475 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h @@ -8,7 +8,7 @@ int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, struct iattr *iattr); -int xfs_break_layouts(struct inode *inode, uint *iolock); +int xfs_break_layouts(struct inode *inode, uint *iolock, bool with_imutex); #else static inline int xfs_break_layouts(struct inode *inode, uint *iolock) {
We want to drop all I/O path locks when recalling layouts, and that includes i_mutex for the write path. Without this we get stuck processe when recalls take too long. Signed-off-by: Christoph Hellwig <hch@lst.de> --- fs/xfs/xfs_file.c | 4 ++-- fs/xfs/xfs_ioctl.c | 2 +- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_pnfs.c | 7 ++++++- fs/xfs/xfs_pnfs.h | 2 +- 5 files changed, 11 insertions(+), 6 deletions(-)