diff mbox series

block: Reject misaligned write requests with BDRV_REQ_NO_FALLBACK

Message ID 20191013204853.1046-1-berto@igalia.com (mailing list archive)
State New, archived
Headers show
Series block: Reject misaligned write requests with BDRV_REQ_NO_FALLBACK | expand

Commit Message

Alberto Garcia Oct. 13, 2019, 8:48 p.m. UTC
The BDRV_REQ_NO_FALLBACK flag means that an operation should only be
performed if it can be offloaded or otherwise performed efficiently.

However a misaligned write request requires a RMW so we should return
an error and let the caller decide how to proceed.

This hits an assertion since commit c8bb23cbdb if the required
alignment is larger than the cluster size:

qemu-img create -f qcow2 -o cluster_size=2k img.qcow2 4G
qemu-io -c "open -o driver=qcow2,file.align=4k blkdebug::img.qcow2" \
        -c 'write 0 512'
qemu-io: block/io.c:1127: bdrv_driver_pwritev: Assertion `!(flags & BDRV_REQ_NO_FALLBACK)' failed.
Aborted

Signed-off-by: Alberto Garcia <berto@igalia.com>
---
 block/io.c                 |  6 +++++
 tests/qemu-iotests/268     | 55 ++++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/268.out |  7 +++++
 tests/qemu-iotests/group   |  1 +
 4 files changed, 69 insertions(+)
 create mode 100755 tests/qemu-iotests/268
 create mode 100644 tests/qemu-iotests/268.out

Comments

Vladimir Sementsov-Ogievskiy Oct. 14, 2019, 10:11 a.m. UTC | #1
13.10.2019 23:48, Alberto Garcia wrote:
> The BDRV_REQ_NO_FALLBACK flag means that an operation should only be
> performed if it can be offloaded or otherwise performed efficiently.

As I know, BDRV_REQ_NO_FALLBACK is for write-zeros only, not about offloading..

> 
> However a misaligned write request requires a RMW so we should return
> an error and let the caller decide how to proceed.

Because we can finish up with trying to to normal write (not write_zero) with
BDRV_REQ_NO_FALLBACK flag, which is forbidden for bdrv_driver_pwritev, as it's
shown in assertion below.

> 
> This hits an assertion since commit c8bb23cbdb if the required
> alignment is larger than the cluster size:
> 
> qemu-img create -f qcow2 -o cluster_size=2k img.qcow2 4G
> qemu-io -c "open -o driver=qcow2,file.align=4k blkdebug::img.qcow2" \
>          -c 'write 0 512'
> qemu-io: block/io.c:1127: bdrv_driver_pwritev: Assertion `!(flags & BDRV_REQ_NO_FALLBACK)' failed.
> Aborted
> 
> Signed-off-by: Alberto Garcia <berto@igalia.com>
> ---
>   block/io.c                 |  6 +++++
>   tests/qemu-iotests/268     | 55 ++++++++++++++++++++++++++++++++++++++
>   tests/qemu-iotests/268.out |  7 +++++
>   tests/qemu-iotests/group   |  1 +
>   4 files changed, 69 insertions(+)
>   create mode 100755 tests/qemu-iotests/268
>   create mode 100644 tests/qemu-iotests/268.out
> 
> diff --git a/block/io.c b/block/io.c
> index 4f9ee97c2b..c5d4d029da 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2071,6 +2071,12 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
>           return ret;
>       }
>   
> +    /* If the request is misaligned then we can't make it efficient */
> +    if (((offset & (align - 1)) || (bytes & (align - 1))) &&
> +        (flags & BDRV_REQ_NO_FALLBACK)) {
> +        return -ENOTSUP;
> +    }
> +

So, if BDRV_REQ_NO_FALLBACK is only for write zeros, most correct place for this check
is bdrv_co_do_zero_pwritev().. But it's OK as is too.

Not long ago such checks was fixed to be QEMU_IS_ALIGNED, so with it used instead:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

>       bdrv_inc_in_flight(bs);
>       /*
>        * Align write if necessary by performing a read-modify-write cycle.
> diff --git a/tests/qemu-iotests/268 b/tests/qemu-iotests/268
> new file mode 100755
> index 0000000000..895f6e593f
> --- /dev/null
> +++ b/tests/qemu-iotests/268
> @@ -0,0 +1,55 @@
> +#!/usr/bin/env bash
> +#
> +# Test write request with required alignment larger than the cluster size
> +#
> +# Copyright (C) 2019 Igalia, S.L.
> +# Author: Alberto Garcia <berto@igalia.com>
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program.  If not, see <http://www.gnu.org/licenses/>.
> +#
> +
> +# creator
> +owner=berto@igalia.com
> +
> +seq=`basename $0`
> +echo "QA output created by $seq"
> +
> +status=1	# failure is the default!
> +
> +_cleanup()
> +{
> +    _cleanup_test_img
> +}
> +trap "_cleanup; exit \$status" 0 1 2 3 15
> +
> +# get standard environment, filters and checks
> +. ./common.rc
> +. ./common.filter
> +
> +_supported_fmt qcow2
> +_supported_proto file
> +
> +echo
> +echo "== Required alignment larger than cluster size =="
> +
> +CLUSTER_SIZE=2k _make_test_img 1M
> +# Since commit c8bb23cbdb writing to an allocated cluster fills the
> +# empty COW areas with bdrv_write_zeroes(flags=BDRV_REQ_NO_FALLBACK)
> +$QEMU_IO -c "open -o driver=$IMGFMT,file.align=4k blkdebug::$TEST_IMG" \
> +         -c "write 0 512" | _filter_qemu_io
> +
> +# success, all done
> +echo "*** done"
> +rm -f $seq.full
> +status=0
> diff --git a/tests/qemu-iotests/268.out b/tests/qemu-iotests/268.out
> new file mode 100644
> index 0000000000..2ed6c68529
> --- /dev/null
> +++ b/tests/qemu-iotests/268.out
> @@ -0,0 +1,7 @@
> +QA output created by 268
> +
> +== Required alignment larger than cluster size ==
> +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
> +wrote 512/512 bytes at offset 0
> +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +*** done
> diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
> index 5805a79d9e..4c861f7eed 100644
> --- a/tests/qemu-iotests/group
> +++ b/tests/qemu-iotests/group
> @@ -278,3 +278,4 @@
>   265 rw auto quick
>   266 rw quick
>   267 rw auto quick snapshot
> +268 rw auto quick
>
Vladimir Sementsov-Ogievskiy Oct. 14, 2019, 10:14 a.m. UTC | #2
14.10.2019 13:11, Vladimir Sementsov-Ogievskiy wrote:
> 13.10.2019 23:48, Alberto Garcia wrote:
>> The BDRV_REQ_NO_FALLBACK flag means that an operation should only be
>> performed if it can be offloaded or otherwise performed efficiently.
> 
> As I know, BDRV_REQ_NO_FALLBACK is for write-zeros only, not about offloading..
> 
>>
>> However a misaligned write request requires a RMW so we should return
>> an error and let the caller decide how to proceed.
> 
> Because we can finish up with trying to to normal write (not write_zero) with
> BDRV_REQ_NO_FALLBACK flag, which is forbidden for bdrv_driver_pwritev, as it's
> shown in assertion below.
> 

Haha, I'm too late, see it's already queued, sorry.
Alberto Garcia Oct. 14, 2019, 11:13 a.m. UTC | #3
On Mon 14 Oct 2019 12:11:52 PM CEST, Vladimir Sementsov-Ogievskiy wrote:
> 13.10.2019 23:48, Alberto Garcia wrote:
>> The BDRV_REQ_NO_FALLBACK flag means that an operation should only be
>> performed if it can be offloaded or otherwise performed efficiently.
>
> As I know, BDRV_REQ_NO_FALLBACK is for write-zeros only, not about
> offloading..

I just used the same wording from the documentation in block.h:

/* Execute the request only if the operation can be offloaded or otherwise
 * be executed efficiently, but return an error instead of using a slow
 * fallback. */

Berto
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index 4f9ee97c2b..c5d4d029da 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2071,6 +2071,12 @@  int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
         return ret;
     }
 
+    /* If the request is misaligned then we can't make it efficient */
+    if (((offset & (align - 1)) || (bytes & (align - 1))) &&
+        (flags & BDRV_REQ_NO_FALLBACK)) {
+        return -ENOTSUP;
+    }
+
     bdrv_inc_in_flight(bs);
     /*
      * Align write if necessary by performing a read-modify-write cycle.
diff --git a/tests/qemu-iotests/268 b/tests/qemu-iotests/268
new file mode 100755
index 0000000000..895f6e593f
--- /dev/null
+++ b/tests/qemu-iotests/268
@@ -0,0 +1,55 @@ 
+#!/usr/bin/env bash
+#
+# Test write request with required alignment larger than the cluster size
+#
+# Copyright (C) 2019 Igalia, S.L.
+# Author: Alberto Garcia <berto@igalia.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=berto@igalia.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+
+echo
+echo "== Required alignment larger than cluster size =="
+
+CLUSTER_SIZE=2k _make_test_img 1M
+# Since commit c8bb23cbdb writing to an allocated cluster fills the
+# empty COW areas with bdrv_write_zeroes(flags=BDRV_REQ_NO_FALLBACK)
+$QEMU_IO -c "open -o driver=$IMGFMT,file.align=4k blkdebug::$TEST_IMG" \
+         -c "write 0 512" | _filter_qemu_io
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/268.out b/tests/qemu-iotests/268.out
new file mode 100644
index 0000000000..2ed6c68529
--- /dev/null
+++ b/tests/qemu-iotests/268.out
@@ -0,0 +1,7 @@ 
+QA output created by 268
+
+== Required alignment larger than cluster size ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+wrote 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 5805a79d9e..4c861f7eed 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -278,3 +278,4 @@ 
 265 rw auto quick
 266 rw quick
 267 rw auto quick snapshot
+268 rw auto quick