diff mbox series

[03/15] iomap: Allow filesystem to call iomap_dio_complete without i_rwsem

Message ID 20200921144353.31319-4-rgoldwyn@suse.de (mailing list archive)
State New, archived
Headers show
Series BTRFS DIO inode locking/D_SYNC fix | expand

Commit Message

Goldwyn Rodrigues Sept. 21, 2020, 2:43 p.m. UTC
From: Christoph Hellwig <hch@lst.de>

This is to avoid the deadlock caused in btrfs because of O_DIRECT |
O_DSYNC.

Filesystems such as btrfs require i_rwsem while performing sync on a
file. iomap_dio_rw() is called under i_rw_sem. This leads to a
deadlock because of:

iomap_dio_complete()
  generic_write_sync()
    btrfs_sync_file()

Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
can call iomap_dio_complete() after unlocking i_rwsem.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 fs/iomap/direct-io.c  | 34 +++++++++++++++++++++++++---------
 include/linux/iomap.h |  5 +++++
 2 files changed, 30 insertions(+), 9 deletions(-)

Comments

Johannes Thumshirn Sept. 21, 2020, 3:09 p.m. UTC | #1
On 21/09/2020 16:44, Goldwyn Rodrigues wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> This is to avoid the deadlock caused in btrfs because of O_DIRECT |
> O_DSYNC.
> 
> Filesystems such as btrfs require i_rwsem while performing sync on a
> file. iomap_dio_rw() is called under i_rw_sem. This leads to a
> deadlock because of:
> 
> iomap_dio_complete()
>   generic_write_sync()
>     btrfs_sync_file()
> 
> Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
> can call iomap_dio_complete() after unlocking i_rwsem.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
>

This is missing Christoph's S-o-b
Christoph Hellwig Sept. 22, 2020, 1:19 p.m. UTC | #2
On Mon, Sep 21, 2020 at 03:09:29PM +0000, Johannes Thumshirn wrote:
> On 21/09/2020 16:44, Goldwyn Rodrigues wrote:
> > From: Christoph Hellwig <hch@lst.de>
> > 
> > This is to avoid the deadlock caused in btrfs because of O_DIRECT |
> > O_DSYNC.
> > 
> > Filesystems such as btrfs require i_rwsem while performing sync on a
> > file. iomap_dio_rw() is called under i_rw_sem. This leads to a
> > deadlock because of:
> > 
> > iomap_dio_complete()
> >   generic_write_sync()
> >     btrfs_sync_file()
> > 
> > Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
> > can call iomap_dio_complete() after unlocking i_rwsem.
> > 
> > Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> >
> 
> This is missing Christoph's S-o-b

Goldwyn picked this up for a rfc.  But this looks like my patch with
a sane commit log, so:

Signed-off-by: Christoph Hellwig <hch@lst.de>
Josef Bacik Sept. 22, 2020, 2:17 p.m. UTC | #3
On 9/21/20 10:43 AM, Goldwyn Rodrigues wrote:
> From: Christoph Hellwig <hch@lst.de>
> 
> This is to avoid the deadlock caused in btrfs because of O_DIRECT |
> O_DSYNC.
> 
> Filesystems such as btrfs require i_rwsem while performing sync on a
> file. iomap_dio_rw() is called under i_rw_sem. This leads to a
> deadlock because of:
> 
> iomap_dio_complete()
>    generic_write_sync()
>      btrfs_sync_file()
> 
> Separate out iomap_dio_complete() from iomap_dio_rw(), so filesystems
> can call iomap_dio_complete() after unlocking i_rwsem.
> 
> Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
> ---
>   fs/iomap/direct-io.c  | 34 +++++++++++++++++++++++++---------
>   include/linux/iomap.h |  5 +++++
>   2 files changed, 30 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index c1aafb2ab990..d970c6bbbe11 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
>   		dio->submit.cookie = submit_bio(bio);
>   }
>   
> -static ssize_t iomap_dio_complete(struct iomap_dio *dio)
> +ssize_t iomap_dio_complete(struct iomap_dio *dio)
>   {
>   	const struct iomap_dio_ops *dops = dio->dops;
>   	struct kiocb *iocb = dio->iocb;
> @@ -130,6 +130,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
>   
>   	return ret;
>   }
> +EXPORT_SYMBOL_GPL(iomap_dio_complete);
>   
>   static void iomap_dio_complete_work(struct work_struct *work)
>   {
> @@ -406,8 +407,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
>    * Returns -ENOTBLK In case of a page invalidation invalidation failure for
>    * writes.  The callers needs to fall back to buffered I/O in this case.
>    */
> -ssize_t
> -iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> +struct iomap_dio *
> +__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>   		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
>   		bool wait_for_completion)
>   {
> @@ -421,14 +422,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>   	struct iomap_dio *dio;
>   
>   	if (!count)
> -		return 0;
> +		return NULL;
>   
>   	if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
> -		return -EIO;
> +		return ERR_PTR(-EIO);
>   
>   	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
>   	if (!dio)
> -		return -ENOMEM;
> +		return ERR_PTR(-ENOMEM);
>   
>   	dio->iocb = iocb;
>   	atomic_set(&dio->ref, 1);
> @@ -558,7 +559,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>   	dio->wait_for_completion = wait_for_completion;
>   	if (!atomic_dec_and_test(&dio->ref)) {
>   		if (!wait_for_completion)
> -			return -EIOCBQUEUED;
> +			return ERR_PTR(-EIOCBQUEUED);
>   
>   		for (;;) {
>   			set_current_state(TASK_UNINTERRUPTIBLE);
> @@ -574,10 +575,25 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>   		__set_current_state(TASK_RUNNING);
>   	}
>   
> -	return iomap_dio_complete(dio);
> +	return dio;
>   
>   out_free_dio:
>   	kfree(dio);
> -	return ret;
> +	return ERR_PTR(ret);
> +}
> +EXPORT_SYMBOL_GPL(__iomap_dio_rw);
> +
> +ssize_t
> +iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
> +		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
> +		bool wait_for_completion)
> +{
> +	struct iomap_dio *dio;
> +
> +	dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
> +	if (IS_ERR_OR_NULL(dio))
> +		return PTR_ERR_OR_ZERO(dio);
> +	return iomap_dio_complete(dio);
>   }
>   EXPORT_SYMBOL_GPL(iomap_dio_rw);
> +

Got an extra + here for some reason.  Otherwise

Reviewed-by: Josef Bacik <josef@toxicpanda.com>

Thanks,

Josef
diff mbox series

Patch

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index c1aafb2ab990..d970c6bbbe11 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -76,7 +76,7 @@  static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
 		dio->submit.cookie = submit_bio(bio);
 }
 
-static ssize_t iomap_dio_complete(struct iomap_dio *dio)
+ssize_t iomap_dio_complete(struct iomap_dio *dio)
 {
 	const struct iomap_dio_ops *dops = dio->dops;
 	struct kiocb *iocb = dio->iocb;
@@ -130,6 +130,7 @@  static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(iomap_dio_complete);
 
 static void iomap_dio_complete_work(struct work_struct *work)
 {
@@ -406,8 +407,8 @@  iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
  * Returns -ENOTBLK In case of a page invalidation invalidation failure for
  * writes.  The callers needs to fall back to buffered I/O in this case.
  */
-ssize_t
-iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+struct iomap_dio *
+__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		bool wait_for_completion)
 {
@@ -421,14 +422,14 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct iomap_dio *dio;
 
 	if (!count)
-		return 0;
+		return NULL;
 
 	if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
-		return -EIO;
+		return ERR_PTR(-EIO);
 
 	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
 	if (!dio)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	dio->iocb = iocb;
 	atomic_set(&dio->ref, 1);
@@ -558,7 +559,7 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->wait_for_completion = wait_for_completion;
 	if (!atomic_dec_and_test(&dio->ref)) {
 		if (!wait_for_completion)
-			return -EIOCBQUEUED;
+			return ERR_PTR(-EIOCBQUEUED);
 
 		for (;;) {
 			set_current_state(TASK_UNINTERRUPTIBLE);
@@ -574,10 +575,25 @@  iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		__set_current_state(TASK_RUNNING);
 	}
 
-	return iomap_dio_complete(dio);
+	return dio;
 
 out_free_dio:
 	kfree(dio);
-	return ret;
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(__iomap_dio_rw);
+
+ssize_t
+iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+		bool wait_for_completion)
+{
+	struct iomap_dio *dio;
+
+	dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
+	if (IS_ERR_OR_NULL(dio))
+		return PTR_ERR_OR_ZERO(dio);
+	return iomap_dio_complete(dio);
 }
 EXPORT_SYMBOL_GPL(iomap_dio_rw);
+
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 4d1d3c3469e9..172b3397a1a3 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -13,6 +13,7 @@ 
 struct address_space;
 struct fiemap_extent_info;
 struct inode;
+struct iomap_dio;
 struct iomap_writepage_ctx;
 struct iov_iter;
 struct kiocb;
@@ -258,6 +259,10 @@  struct iomap_dio_ops {
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		bool wait_for_completion);
+struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
+		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
+		bool wait_for_completion);
+ssize_t iomap_dio_complete(struct iomap_dio *dio);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
 
 #ifdef CONFIG_SWAP