diff mbox series

block: workaround for unaligned byte range in fallocate()

Message ID 1566498661-53008-1-git-send-email-andrey.shinkevich@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series block: workaround for unaligned byte range in fallocate() | expand

Commit Message

Andrey Shinkevich Aug. 22, 2019, 6:31 p.m. UTC
Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
and make better error handling for the file systems that do not support
fallocate() for the unaligned byte range. Allow falling back to pwrite
in case fallocate() returns EINVAL.

Suggested-by: Kevin Wolf <kwolf@redhat.com>
Suggested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
---
Discussed in email thread with the message ID
<1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>

 block/file-posix.c | 7 +++++++
 block/io.c         | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

Comments

Vladimir Sementsov-Ogievskiy Aug. 22, 2019, 6:55 p.m. UTC | #1
22.08.2019 21:31, Andrey Shinkevich wrote:
> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
> and make better error handling for the file systems that do not support
> fallocate() for the unaligned byte range. Allow falling back to pwrite
> in case fallocate() returns EINVAL.
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Suggested-by: Eric Blake <eblake@redhat.com>
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
> Discussed in email thread with the message ID
> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
> 
>   block/file-posix.c | 7 +++++++
>   block/io.c         | 2 +-
>   2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index fbeb006..2c254ff 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1588,6 +1588,13 @@ static int handle_aiocb_write_zeroes(void *opaque)
>       if (s->has_write_zeroes) {
>           int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>                                  aiocb->aio_offset, aiocb->aio_nbytes);
> +        if (ret == -EINVAL) {
> +            /*
> +             * Allow falling back to pwrite for file systems that
> +             * do not support fallocate() for unaligned byte range.
> +             */
> +            return -ENOTSUP;
> +        }
>           if (ret == 0 || ret != -ENOTSUP) {
>               return ret;
>           }

Hmm stop, you've done exactly what Den was afraid of:

the next line
   s->has_write_zeroes = false;

will disable write_zeroes forever.

Something like

--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1588,10 +1588,12 @@ static int handle_aiocb_write_zeroes(void *opaque)
      if (s->has_write_zeroes) {
          int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
                                 aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == 0 || ret != -ENOTSUP) {
+        if (ret == 0 || (ret != -ENOTSUP && ret != -EINVAL)) {
              return ret;
          }
-        s->has_write_zeroes = false;
+        if (ret == -ENOTSUP) {
+            s->has_write_zeroes = false;
+        }
      }
  #endif


will work better. So, handle ENOTSUP as "disable write_zeros forever", and EINVAL as
"don't disable, but fallback to writing zeros". And we need same handling for following do_fallocate() calls
too (otherwise they again fails with EINVAL which will break the whole thing).

> diff --git a/block/io.c b/block/io.c
> index 56bbf19..58f08cd 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1558,7 +1558,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>               assert(!bs->supported_zero_flags);
>           }
>   
> -        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
> +        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
>               /* Fall back to bounce buffer if write zeroes is unsupported */
>               BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
>   
>
Vladimir Sementsov-Ogievskiy Aug. 22, 2019, 7:10 p.m. UTC | #2
22.08.2019 21:55, Vladimir Sementsov-Ogievskiy wrote:
> 22.08.2019 21:31, Andrey Shinkevich wrote:
>> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
>> and make better error handling for the file systems that do not support
>> fallocate() for the unaligned byte range. Allow falling back to pwrite
>> in case fallocate() returns EINVAL.
>>
>> Suggested-by: Kevin Wolf <kwolf@redhat.com>
>> Suggested-by: Eric Blake <eblake@redhat.com>
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> ---
>> Discussed in email thread with the message ID
>> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
>>
>>   block/file-posix.c | 7 +++++++
>>   block/io.c         | 2 +-
>>   2 files changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/file-posix.c b/block/file-posix.c
>> index fbeb006..2c254ff 100644
>> --- a/block/file-posix.c
>> +++ b/block/file-posix.c
>> @@ -1588,6 +1588,13 @@ static int j(void *opaque)
>>       if (s->has_write_zeroes) {
>>           int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>>                                  aiocb->aio_offset, aiocb->aio_nbytes);
>> +        if (ret == -EINVAL) {
>> +            /*
>> +             * Allow falling back to pwrite for file systems that
>> +             * do not support fallocate() for unaligned byte range.
>> +             */
>> +            return -ENOTSUP;
>> +        }
>>           if (ret == 0 || ret != -ENOTSUP) {
>>               return ret;
>>           }
> 
> Hmm stop, you've done exactly what Den was afraid of:
> 
> the next line
>    s->has_write_zeroes = false;
> 
> will disable write_zeroes forever.
> 
> Something like
> 
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1588,10 +1588,12 @@ static int handle_aiocb_write_zeroes(void *opaque)
>       if (s->has_write_zeroes) {
>           int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>                                  aiocb->aio_offset, aiocb->aio_nbytes);
> -        if (ret == 0 || ret != -ENOTSUP) {
> +        if (ret == 0 || (ret != -ENOTSUP && ret != -EINVAL)) {
>               return ret;
>           }
> -        s->has_write_zeroes = false;
> +        if (ret == -ENOTSUP) {
> +            s->has_write_zeroes = false;
> +        }
>       }
>   #endif
> 
> 
> will work better. So, handle ENOTSUP as "disable write_zeros forever", and EINVAL as
> "don't disable, but fallback to writing zeros". And we need same handling for following do_fallocate() calls
> too (otherwise they again fails with EINVAL which will break the whole thing).
> 

Oops, sorry, I misread your patch, it's OK.

Still we may want to handle other do_fallocate() calls in same manner, or may be just:

@@ -1558,7 +1558,13 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
              assert(!bs->supported_zero_flags);
          }

-        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
+        /*
+         * We are sure that our arguments make sense, so consider "invalid
+         * argument" in same manner as "not supported".
+         */
+        if ((ret == -ENOTSUP || ret == -EINVAL) &&
+            !(flags & BDRV_REQ_NO_FALLBACK))
+        {
              /* Fall back to bounce buffer if write zeroes is unsupported */
              BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
Eric Blake Aug. 22, 2019, 9:09 p.m. UTC | #3
On 8/22/19 1:31 PM, Andrey Shinkevich wrote:
> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
> and make better error handling for the file systems that do not support

s/make/use/

> fallocate() for the unaligned byte range. Allow falling back to pwrite

s/the/an/

> in case fallocate() returns EINVAL.
> 
> Suggested-by: Kevin Wolf <kwolf@redhat.com>
> Suggested-by: Eric Blake <eblake@redhat.com>
> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
> Discussed in email thread with the message ID
> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
> 
>  block/file-posix.c | 7 +++++++
>  block/io.c         | 2 +-
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/file-posix.c b/block/file-posix.c
> index fbeb006..2c254ff 100644
> --- a/block/file-posix.c
> +++ b/block/file-posix.c
> @@ -1588,6 +1588,13 @@ static int handle_aiocb_write_zeroes(void *opaque)
>      if (s->has_write_zeroes) {
>          int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>                                 aiocb->aio_offset, aiocb->aio_nbytes);
> +        if (ret == -EINVAL) {
> +            /*
> +             * Allow falling back to pwrite for file systems that
> +             * do not support fallocate() for unaligned byte range.

s/for/for an/

> +             */
> +            return -ENOTSUP;
> +        }
>          if (ret == 0 || ret != -ENOTSUP) {
>              return ret;
>          }
> diff --git a/block/io.c b/block/io.c
> index 56bbf19..58f08cd 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1558,7 +1558,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>              assert(!bs->supported_zero_flags);
>          }
>  
> -        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
> +        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
>              /* Fall back to bounce buffer if write zeroes is unsupported */
>              BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
>  
> 

Reviewed-by: Eric Blake <eblake@redhat.com>
Andrey Shinkevich Aug. 23, 2019, 12:07 p.m. UTC | #4
On 23/08/2019 00:09, Eric Blake wrote:
> On 8/22/19 1:31 PM, Andrey Shinkevich wrote:
>> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
>> and make better error handling for the file systems that do not support
> 
> s/make/use/
> 
>> fallocate() for the unaligned byte range. Allow falling back to pwrite
> 
> s/the/an/
> 
>> in case fallocate() returns EINVAL.
>>
>> Suggested-by: Kevin Wolf <kwolf@redhat.com>
>> Suggested-by: Eric Blake <eblake@redhat.com>
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> ---
>> Discussed in email thread with the message ID
>> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
>>
>>   block/file-posix.c | 7 +++++++
>>   block/io.c         | 2 +-
>>   2 files changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/file-posix.c b/block/file-posix.c
>> index fbeb006..2c254ff 100644
>> --- a/block/file-posix.c
>> +++ b/block/file-posix.c
>> @@ -1588,6 +1588,13 @@ static int handle_aiocb_write_zeroes(void *opaque)
>>       if (s->has_write_zeroes) {
>>           int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>>                                  aiocb->aio_offset, aiocb->aio_nbytes);
>> +        if (ret == -EINVAL) {
>> +            /*
>> +             * Allow falling back to pwrite for file systems that
>> +             * do not support fallocate() for unaligned byte range.
> 
> s/for/for an/
> 
>> +             */
>> +            return -ENOTSUP;
>> +        }
>>           if (ret == 0 || ret != -ENOTSUP) {
>>               return ret;
>>           }
>> diff --git a/block/io.c b/block/io.c
>> index 56bbf19..58f08cd 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>> @@ -1558,7 +1558,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>>               assert(!bs->supported_zero_flags);
>>           }
>>   
>> -        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
>> +        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
>>               /* Fall back to bounce buffer if write zeroes is unsupported */
>>               BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
>>   
>>
> 
> Reviewed-by: Eric Blake <eblake@redhat.com>
> 

Thank you Eric!
Also, I would prefer "make the finer error handling" because the word 
'better' has a wide sense.
I have prepared the v2 with your corrections and am waiting for Denis 
Lunev's response. He is coming back to work from his vacation on Tuesday.

Andrey
Denis V. Lunev Aug. 27, 2019, 12:35 p.m. UTC | #5
On 8/22/19 11:09 PM, Eric Blake wrote:
> On 8/22/19 1:31 PM, Andrey Shinkevich wrote:
>> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
>> and make better error handling for the file systems that do not support
> s/make/use/
>
>> fallocate() for the unaligned byte range. Allow falling back to pwrite
> s/the/an/
>
>> in case fallocate() returns EINVAL.
>>
>> Suggested-by: Kevin Wolf <kwolf@redhat.com>
>> Suggested-by: Eric Blake <eblake@redhat.com>
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> ---
>> Discussed in email thread with the message ID
>> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
>>
>>  block/file-posix.c | 7 +++++++
>>  block/io.c         | 2 +-
>>  2 files changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/file-posix.c b/block/file-posix.c
>> index fbeb006..2c254ff 100644
>> --- a/block/file-posix.c
>> +++ b/block/file-posix.c
>> @@ -1588,6 +1588,13 @@ static int handle_aiocb_write_zeroes(void *opaque)
>>      if (s->has_write_zeroes) {
>>          int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>>                                 aiocb->aio_offset, aiocb->aio_nbytes);
>> +        if (ret == -EINVAL) {
>> +            /*
>> +             * Allow falling back to pwrite for file systems that
>> +             * do not support fallocate() for unaligned byte range.
> s/for/for an/
>
>> +             */
>> +            return -ENOTSUP;
>> +        }
>>          if (ret == 0 || ret != -ENOTSUP) {
>>              return ret;
>>          }
>> diff --git a/block/io.c b/block/io.c
>> index 56bbf19..58f08cd 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>> @@ -1558,7 +1558,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>>              assert(!bs->supported_zero_flags);
>>          }
>>  
>> -        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
>> +        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
>>              /* Fall back to bounce buffer if write zeroes is unsupported */
>>              BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
>>  
>>
> Reviewed-by: Eric Blake <eblake@redhat.com>
>
Reviewed-by: Denis V. Lunev <den@openvz.org>
Andrey Shinkevich Aug. 27, 2019, 12:39 p.m. UTC | #6
On 23/08/2019 00:09, Eric Blake wrote:
> On 8/22/19 1:31 PM, Andrey Shinkevich wrote:
>> Revert the commit 118f99442d 'block/io.c: fix for the allocation failure'
>> and make better error handling for the file systems that do not support
> 
> s/make/use/
> 
>> fallocate() for the unaligned byte range. Allow falling back to pwrite
> 
> s/the/an/
> 
>> in case fallocate() returns EINVAL.
>>
>> Suggested-by: Kevin Wolf <kwolf@redhat.com>
>> Suggested-by: Eric Blake <eblake@redhat.com>
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> ---
>> Discussed in email thread with the message ID
>> <1554474244-553661-1-git-send-email-andrey.shinkevich@virtuozzo.com>
>>
>>   block/file-posix.c | 7 +++++++
>>   block/io.c         | 2 +-
>>   2 files changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/block/file-posix.c b/block/file-posix.c
>> index fbeb006..2c254ff 100644
>> --- a/block/file-posix.c
>> +++ b/block/file-posix.c
>> @@ -1588,6 +1588,13 @@ static int handle_aiocb_write_zeroes(void *opaque)
>>       if (s->has_write_zeroes) {
>>           int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
>>                                  aiocb->aio_offset, aiocb->aio_nbytes);
>> +        if (ret == -EINVAL) {
>> +            /*
>> +             * Allow falling back to pwrite for file systems that
>> +             * do not support fallocate() for unaligned byte range.
> 
> s/for/for an/
> 
>> +             */
>> +            return -ENOTSUP;
>> +        }
>>           if (ret == 0 || ret != -ENOTSUP) {
>>               return ret;
>>           }
>> diff --git a/block/io.c b/block/io.c
>> index 56bbf19..58f08cd 100644
>> --- a/block/io.c
>> +++ b/block/io.c
>> @@ -1558,7 +1558,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
>>               assert(!bs->supported_zero_flags);
>>           }
>>   
>> -        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
>> +        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
>>               /* Fall back to bounce buffer if write zeroes is unsupported */
>>               BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
>>   
>>
> 
> Reviewed-by: Eric Blake <eblake@redhat.com>
> 
Eric,
If you are good to take this patch to your branch with your corrections, 
please let us know. Otherwise, I will send the v2 with your corrections.
Eric Blake Aug. 27, 2019, 1:50 p.m. UTC | #7
On 8/27/19 7:39 AM, Andrey Shinkevich wrote:
> 
> 

>>>
>>
>> Reviewed-by: Eric Blake <eblake@redhat.com>
>>
> Eric,
> If you are good to take this patch to your branch with your corrections, 
> please let us know. Otherwise, I will send the v2 with your corrections.

Since I based my fast-zero patches on top of this, I'm fine making the
corrections locally and queuing this patch through my NBD tree.
diff mbox series

Patch

diff --git a/block/file-posix.c b/block/file-posix.c
index fbeb006..2c254ff 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1588,6 +1588,13 @@  static int handle_aiocb_write_zeroes(void *opaque)
     if (s->has_write_zeroes) {
         int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
                                aiocb->aio_offset, aiocb->aio_nbytes);
+        if (ret == -EINVAL) {
+            /*
+             * Allow falling back to pwrite for file systems that
+             * do not support fallocate() for unaligned byte range.
+             */
+            return -ENOTSUP;
+        }
         if (ret == 0 || ret != -ENOTSUP) {
             return ret;
         }
diff --git a/block/io.c b/block/io.c
index 56bbf19..58f08cd 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1558,7 +1558,7 @@  static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
             assert(!bs->supported_zero_flags);
         }
 
-        if (ret < 0 && !(flags & BDRV_REQ_NO_FALLBACK)) {
+        if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
             /* Fall back to bounce buffer if write zeroes is unsupported */
             BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;