diff mbox

FUSE: Improve aio directIO write performance for size extending writes.

Message ID 1460029691-7550-1-git-send-email-ashishsangwan2@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ashish Sangwan April 7, 2016, 11:48 a.m. UTC
While sending the blocking directIO in fuse, the write request is broken
into sub-requests, each of default size 128k and all the requests are sent
in non-blocking background mode if async_dio mode is supported by libfuse.
The process which issue the write wait for the completion of all the
sub-requests. Sending multiple requests parallely gives a chance to perform
parallel writes in the user space fuse implementation if it is
multi-threaded and hence improves the performance.

When there is a size extending aio dio write, we switch to
blocking mode so that we can properly update the size of the file after
completion of the writes. However, in this situation all the sub-requests
are sent in serialized manner where the next request is sent only after
receiving the reply of the current request. Hence the multi-threaded user
space implementation is not utilized properly.

This patch changes the size extending aio dio behavior to exactly follow
blocking dio. For multi threaded fuse implementation having 10 threads and
using buffer size of 64MB to perform async directIO, we are getting double
the speed.

Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
---
 fs/fuse/file.c   |   16 ++++++++--------
 fs/fuse/fuse_i.h |    1 +
 2 files changed, 9 insertions(+), 8 deletions(-)

Comments

Ashish Sangwan April 14, 2016, 12:32 p.m. UTC | #1
*ping*

Last time it bounced off the fuse mailing list.

On Thu, Apr 7, 2016 at 5:18 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
> While sending the blocking directIO in fuse, the write request is broken
> into sub-requests, each of default size 128k and all the requests are sent
> in non-blocking background mode if async_dio mode is supported by libfuse.
> The process which issue the write wait for the completion of all the
> sub-requests. Sending multiple requests parallely gives a chance to perform
> parallel writes in the user space fuse implementation if it is
> multi-threaded and hence improves the performance.
>
> When there is a size extending aio dio write, we switch to
> blocking mode so that we can properly update the size of the file after
> completion of the writes. However, in this situation all the sub-requests
> are sent in serialized manner where the next request is sent only after
> receiving the reply of the current request. Hence the multi-threaded user
> space implementation is not utilized properly.
>
> This patch changes the size extending aio dio behavior to exactly follow
> blocking dio. For multi threaded fuse implementation having 10 threads and
> using buffer size of 64MB to perform async directIO, we are getting double
> the speed.
>
> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
> ---
>  fs/fuse/file.c   |   16 ++++++++--------
>  fs/fuse/fuse_i.h |    1 +
>  2 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 9dde38f..b4f8b83 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -572,11 +572,11 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
>                 io->bytes = pos;
>
>         left = --io->reqs;
> -       if (!left && is_sync)
> +       if (!left && (is_sync || io->blocking_aio))
>                 complete(io->done);
>         spin_unlock(&io->lock);
>
> -       if (!left && !is_sync) {
> +       if (!left && !is_sync && !io->blocking_aio) {
>                 ssize_t res = fuse_get_res_by_io(io);
>
>                 if (res >= 0) {
> @@ -2884,17 +2884,17 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
>          */
>         io->async = async_dio;
>         io->iocb = iocb;
> +       io->blocking_aio = false;
>
>         /*
> -        * We cannot asynchronously extend the size of a file. We have no method
> -        * to wait on real async I/O requests, so we must submit this request
> -        * synchronously.
> +        * We cannot asynchronously extend the size of a file.
> +        * In such case the aio will behave exactly like sync io.
>          */
>         if (!is_sync && (offset + count > i_size) &&
>             iov_iter_rw(iter) == WRITE)
> -               io->async = false;
> +               io->blocking_aio = true;
>
> -       if (io->async && is_sync) {
> +       if (io->async && (is_sync | io->blocking_aio)) {
>                 /*
>                  * Additional reference to keep io around after
>                  * calling fuse_aio_complete()
> @@ -2914,7 +2914,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
>                 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
>
>                 /* we have a non-extending, async request, so return */
> -               if (!is_sync)
> +               if (!is_sync && !io->blocking_aio)
>                         return -EIOCBQUEUED;
>
>                 wait_for_completion(&wait);
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index eddbe02..a7cf03f 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -256,6 +256,7 @@ struct fuse_io_priv {
>         struct kiocb *iocb;
>         struct file *file;
>         struct completion *done;
> +       bool blocking_aio;
>  };
>
>  #define FUSE_IO_PRIV_SYNC(f) \
> --
> 1.7.9.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan June 14, 2016, 11:44 a.m. UTC | #2
Hi Miklos,

Did you get any time to look into the patch?
Its been more than two months.

On Thu, Apr 14, 2016 at 6:02 PM, Ashish Sangwan
<ashishsangwan2@gmail.com> wrote:
> *ping*
>
> Last time it bounced off the fuse mailing list.
>
> On Thu, Apr 7, 2016 at 5:18 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>> While sending the blocking directIO in fuse, the write request is broken
>> into sub-requests, each of default size 128k and all the requests are sent
>> in non-blocking background mode if async_dio mode is supported by libfuse.
>> The process which issue the write wait for the completion of all the
>> sub-requests. Sending multiple requests parallely gives a chance to perform
>> parallel writes in the user space fuse implementation if it is
>> multi-threaded and hence improves the performance.
>>
>> When there is a size extending aio dio write, we switch to
>> blocking mode so that we can properly update the size of the file after
>> completion of the writes. However, in this situation all the sub-requests
>> are sent in serialized manner where the next request is sent only after
>> receiving the reply of the current request. Hence the multi-threaded user
>> space implementation is not utilized properly.
>>
>> This patch changes the size extending aio dio behavior to exactly follow
>> blocking dio. For multi threaded fuse implementation having 10 threads and
>> using buffer size of 64MB to perform async directIO, we are getting double
>> the speed.
>>
>> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
>> ---
>>  fs/fuse/file.c   |   16 ++++++++--------
>>  fs/fuse/fuse_i.h |    1 +
>>  2 files changed, 9 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
>> index 9dde38f..b4f8b83 100644
>> --- a/fs/fuse/file.c
>> +++ b/fs/fuse/file.c
>> @@ -572,11 +572,11 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
>>                 io->bytes = pos;
>>
>>         left = --io->reqs;
>> -       if (!left && is_sync)
>> +       if (!left && (is_sync || io->blocking_aio))
>>                 complete(io->done);
>>         spin_unlock(&io->lock);
>>
>> -       if (!left && !is_sync) {
>> +       if (!left && !is_sync && !io->blocking_aio) {
>>                 ssize_t res = fuse_get_res_by_io(io);
>>
>>                 if (res >= 0) {
>> @@ -2884,17 +2884,17 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
>>          */
>>         io->async = async_dio;
>>         io->iocb = iocb;
>> +       io->blocking_aio = false;
>>
>>         /*
>> -        * We cannot asynchronously extend the size of a file. We have no method
>> -        * to wait on real async I/O requests, so we must submit this request
>> -        * synchronously.
>> +        * We cannot asynchronously extend the size of a file.
>> +        * In such case the aio will behave exactly like sync io.
>>          */
>>         if (!is_sync && (offset + count > i_size) &&
>>             iov_iter_rw(iter) == WRITE)
>> -               io->async = false;
>> +               io->blocking_aio = true;
>>
>> -       if (io->async && is_sync) {
>> +       if (io->async && (is_sync | io->blocking_aio)) {
>>                 /*
>>                  * Additional reference to keep io around after
>>                  * calling fuse_aio_complete()
>> @@ -2914,7 +2914,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
>>                 fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
>>
>>                 /* we have a non-extending, async request, so return */
>> -               if (!is_sync)
>> +               if (!is_sync && !io->blocking_aio)
>>                         return -EIOCBQUEUED;
>>
>>                 wait_for_completion(&wait);
>> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
>> index eddbe02..a7cf03f 100644
>> --- a/fs/fuse/fuse_i.h
>> +++ b/fs/fuse/fuse_i.h
>> @@ -256,6 +256,7 @@ struct fuse_io_priv {
>>         struct kiocb *iocb;
>>         struct file *file;
>>         struct completion *done;
>> +       bool blocking_aio;
>>  };
>>
>>  #define FUSE_IO_PRIV_SYNC(f) \
>> --
>> 1.7.9.5
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Miklos Szeredi June 16, 2016, 11:25 a.m. UTC | #3
On Thu, Apr 7, 2016 at 1:48 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
> While sending the blocking directIO in fuse, the write request is broken
> into sub-requests, each of default size 128k and all the requests are sent
> in non-blocking background mode if async_dio mode is supported by libfuse.
> The process which issue the write wait for the completion of all the
> sub-requests. Sending multiple requests parallely gives a chance to perform
> parallel writes in the user space fuse implementation if it is
> multi-threaded and hence improves the performance.
>
> When there is a size extending aio dio write, we switch to
> blocking mode so that we can properly update the size of the file after
> completion of the writes. However, in this situation all the sub-requests
> are sent in serialized manner where the next request is sent only after
> receiving the reply of the current request. Hence the multi-threaded user
> space implementation is not utilized properly.
>
> This patch changes the size extending aio dio behavior to exactly follow
> blocking dio. For multi threaded fuse implementation having 10 threads and
> using buffer size of 64MB to perform async directIO, we are getting double
> the speed.
>
> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>

Thanks for you patience.  Pushed to

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next

I simplified the logic, please verify that I didn't mess something up.

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sedat Dilek June 16, 2016, 11:51 a.m. UTC | #4
On Thu, Jun 16, 2016 at 1:25 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Apr 7, 2016 at 1:48 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>> While sending the blocking directIO in fuse, the write request is broken
>> into sub-requests, each of default size 128k and all the requests are sent
>> in non-blocking background mode if async_dio mode is supported by libfuse.
>> The process which issue the write wait for the completion of all the
>> sub-requests. Sending multiple requests parallely gives a chance to perform
>> parallel writes in the user space fuse implementation if it is
>> multi-threaded and hence improves the performance.
>>
>> When there is a size extending aio dio write, we switch to
>> blocking mode so that we can properly update the size of the file after
>> completion of the writes. However, in this situation all the sub-requests
>> are sent in serialized manner where the next request is sent only after
>> receiving the reply of the current request. Hence the multi-threaded user
>> space implementation is not utilized properly.
>>
>> This patch changes the size extending aio dio behavior to exactly follow
>> blocking dio. For multi threaded fuse implementation having 10 threads and
>> using buffer size of 64MB to perform async directIO, we are getting double
>> the speed.
>>
>> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
>
> Thanks for you patience.  Pushed to
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
>
> I simplified the logic, please verify that I didn't mess something up.
>

Hi,

I would like to test fuse-next on Ubuntu/precise aka 12.04-LTS AMD64.

Do I need a modern version of libfuse?
Libfuse v2.8.6 is installed here.

Thanks.

Regards,
- Sedat -

[1] http://packages.ubuntu.com/search?keywords=libfuse
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Miklos Szeredi June 16, 2016, 12:01 p.m. UTC | #5
On Thu, Jun 16, 2016 at 1:51 PM, Sedat Dilek <sedat.dilek@gmail.com> wrote:
> On Thu, Jun 16, 2016 at 1:25 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> On Thu, Apr 7, 2016 at 1:48 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>>> While sending the blocking directIO in fuse, the write request is broken
>>> into sub-requests, each of default size 128k and all the requests are sent
>>> in non-blocking background mode if async_dio mode is supported by libfuse.
>>> The process which issue the write wait for the completion of all the
>>> sub-requests. Sending multiple requests parallely gives a chance to perform
>>> parallel writes in the user space fuse implementation if it is
>>> multi-threaded and hence improves the performance.
>>>
>>> When there is a size extending aio dio write, we switch to
>>> blocking mode so that we can properly update the size of the file after
>>> completion of the writes. However, in this situation all the sub-requests
>>> are sent in serialized manner where the next request is sent only after
>>> receiving the reply of the current request. Hence the multi-threaded user
>>> space implementation is not utilized properly.
>>>
>>> This patch changes the size extending aio dio behavior to exactly follow
>>> blocking dio. For multi threaded fuse implementation having 10 threads and
>>> using buffer size of 64MB to perform async directIO, we are getting double
>>> the speed.
>>>
>>> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
>>
>> Thanks for you patience.  Pushed to
>>
>>   git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
>>
>> I simplified the logic, please verify that I didn't mess something up.
>>
>
> Hi,
>
> I would like to test fuse-next on Ubuntu/precise aka 12.04-LTS AMD64.
>
> Do I need a modern version of libfuse?
> Libfuse v2.8.6 is installed here.

Fuse will work fine but AFAICS the "async_dio" option was not added to
a 2.X release (it could be backported quite simply if needed).

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sedat Dilek June 16, 2016, 12:22 p.m. UTC | #6
On Thu, Jun 16, 2016 at 2:01 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Jun 16, 2016 at 1:51 PM, Sedat Dilek <sedat.dilek@gmail.com> wrote:
>> On Thu, Jun 16, 2016 at 1:25 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> On Thu, Apr 7, 2016 at 1:48 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>>>> While sending the blocking directIO in fuse, the write request is broken
>>>> into sub-requests, each of default size 128k and all the requests are sent
>>>> in non-blocking background mode if async_dio mode is supported by libfuse.
>>>> The process which issue the write wait for the completion of all the
>>>> sub-requests. Sending multiple requests parallely gives a chance to perform
>>>> parallel writes in the user space fuse implementation if it is
>>>> multi-threaded and hence improves the performance.
>>>>
>>>> When there is a size extending aio dio write, we switch to
>>>> blocking mode so that we can properly update the size of the file after
>>>> completion of the writes. However, in this situation all the sub-requests
>>>> are sent in serialized manner where the next request is sent only after
>>>> receiving the reply of the current request. Hence the multi-threaded user
>>>> space implementation is not utilized properly.
>>>>
>>>> This patch changes the size extending aio dio behavior to exactly follow
>>>> blocking dio. For multi threaded fuse implementation having 10 threads and
>>>> using buffer size of 64MB to perform async directIO, we are getting double
>>>> the speed.
>>>>
>>>> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
>>>
>>> Thanks for you patience.  Pushed to
>>>
>>>   git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
>>>
>>> I simplified the logic, please verify that I didn't mess something up.
>>>
>>
>> Hi,
>>
>> I would like to test fuse-next on Ubuntu/precise aka 12.04-LTS AMD64.
>>
>> Do I need a modern version of libfuse?
>> Libfuse v2.8.6 is installed here.
>
> Fuse will work fine but AFAICS the "async_dio" option was not added to
> a 2.X release (it could be backported quite simply if needed).
>

Can you point me to - preferable - a Git repo of libfuse?
And the commit for backporting?

Thanks.

- Sedat -
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Miklos Szeredi June 16, 2016, 12:36 p.m. UTC | #7
On Thu, Jun 16, 2016 at 2:22 PM, Sedat Dilek <sedat.dilek@gmail.com> wrote:

> Can you point me to - preferable - a Git repo of libfuse?

https://github.com/libfuse

> And the commit for backporting?

8bb62a632caa ("libfuse: Add "async_dio" and "writeback_cache" options")

Thanks,
Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ashish Sangwan June 20, 2016, 6:50 a.m. UTC | #8
On Thu, Jun 16, 2016 at 4:55 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Apr 7, 2016 at 1:48 PM, Ashish Sangwan <ashishsangwan2@gmail.com> wrote:
>> While sending the blocking directIO in fuse, the write request is broken
>> into sub-requests, each of default size 128k and all the requests are sent
>> in non-blocking background mode if async_dio mode is supported by libfuse.
>> The process which issue the write wait for the completion of all the
>> sub-requests. Sending multiple requests parallely gives a chance to perform
>> parallel writes in the user space fuse implementation if it is
>> multi-threaded and hence improves the performance.
>>
>> When there is a size extending aio dio write, we switch to
>> blocking mode so that we can properly update the size of the file after
>> completion of the writes. However, in this situation all the sub-requests
>> are sent in serialized manner where the next request is sent only after
>> receiving the reply of the current request. Hence the multi-threaded user
>> space implementation is not utilized properly.
>>
>> This patch changes the size extending aio dio behavior to exactly follow
>> blocking dio. For multi threaded fuse implementation having 10 threads and
>> using buffer size of 64MB to perform async directIO, we are getting double
>> the speed.
>>
>> Signed-off-by: Ashish Sangwan <ashishsangwan2@gmail.com>
>
> Thanks for you patience.  Pushed to
>
>   git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git for-next
>
> I simplified the logic, please verify that I didn't mess something up.

The change looks ok.

>
> Thanks,
> Miklos
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sedat Dilek June 20, 2016, 9:20 a.m. UTC | #9
On Thu, Jun 16, 2016 at 2:36 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Jun 16, 2016 at 2:22 PM, Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
>> Can you point me to - preferable - a Git repo of libfuse?
>
> https://github.com/libfuse
>
>> And the commit for backporting?
>
> 8bb62a632caa ("libfuse: Add "async_dio" and "writeback_cache" options")
>

[1] says...

"libfuse: Add "async_dio" and "writeback_cache" options

Asynchronous direct I/O is supported by linux kernels 3.13 and
later, writeback caching is supported by 3.14 and later."

Here on Ubuntu/precise I have HWE enabled which ships officially
Linux-kernel v3.13.
According to above log I need at least v3.14 for both features.

Not sure if backporting is easy-to-apply for v3.13.

IIRC Debian maintains a 3.16 LTS kernel.

AFAICS libfuse v2.9.6 which is not available in any Ubuntu release [4]
has this change.
I can build with the sources from Debian [5].

Hmm, maybe it's time to switch over to Ubuntu/xenial or a modern Debian system.

- Sedat -

[1] https://github.com/libfuse/libfuse/commit/8bb62a632caa4269bb6436cae67307404882b936
[2] https://github.com/libfuse/libfuse/releases
[3] http://git.kernel.org/cgit/linux/kernel/git/mszeredi/fuse.git/commit/?h=for-next&id=507c552aa58fa48fdea1373948cfc0a1fd0bf61b
[4] https://packages.ubuntu.com/libfuse
[5] https://packages.debian.org/ibfuse
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9dde38f..b4f8b83 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -572,11 +572,11 @@  static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
 		io->bytes = pos;
 
 	left = --io->reqs;
-	if (!left && is_sync)
+	if (!left && (is_sync || io->blocking_aio))
 		complete(io->done);
 	spin_unlock(&io->lock);
 
-	if (!left && !is_sync) {
+	if (!left && !is_sync && !io->blocking_aio) {
 		ssize_t res = fuse_get_res_by_io(io);
 
 		if (res >= 0) {
@@ -2884,17 +2884,17 @@  fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 	 */
 	io->async = async_dio;
 	io->iocb = iocb;
+	io->blocking_aio = false;
 
 	/*
-	 * We cannot asynchronously extend the size of a file. We have no method
-	 * to wait on real async I/O requests, so we must submit this request
-	 * synchronously.
+	 * We cannot asynchronously extend the size of a file.
+	 * In such case the aio will behave exactly like sync io.
 	 */
 	if (!is_sync && (offset + count > i_size) &&
 	    iov_iter_rw(iter) == WRITE)
-		io->async = false;
+		io->blocking_aio = true;
 
-	if (io->async && is_sync) {
+	if (io->async && (is_sync | io->blocking_aio)) {
 		/*
 		 * Additional reference to keep io around after
 		 * calling fuse_aio_complete()
@@ -2914,7 +2914,7 @@  fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 		fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
 
 		/* we have a non-extending, async request, so return */
-		if (!is_sync)
+		if (!is_sync && !io->blocking_aio)
 			return -EIOCBQUEUED;
 
 		wait_for_completion(&wait);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index eddbe02..a7cf03f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -256,6 +256,7 @@  struct fuse_io_priv {
 	struct kiocb *iocb;
 	struct file *file;
 	struct completion *done;
+	bool blocking_aio;
 };
 
 #define FUSE_IO_PRIV_SYNC(f) \