@@ -781,6 +781,16 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
+ /*
+ * With modification to the qcow2 spec, snapshots could store
+ * whether they are in an all zero state. But for now, we assume
+ * all snapshots are nonzero.
+ */
+ ret = qcow2_mark_nonzero(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
/*
* Make sure that the current L1 table is big enough to contain the whole
* L1 table of the snapshot. If the snapshot L1 table is smaller, the
@@ -1044,6 +1054,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
s->l1_size = sn->l1_size;
s->l1_table_offset = sn->l1_table_offset;
s->l1_table = new_l1_table;
+ s->autoclear_features &= ~QCOW2_AUTOCLEAR_ALL_ZERO;
for(i = 0;i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
@@ -480,6 +480,40 @@ static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
g_free(features);
}
+/*
+ * Clear the all zero bit and flushes afterwards if necessary.
+ *
+ * If updating the header fails, it is not safe to proceed with
+ * modifying the image.
+ */
+int qcow2_mark_nonzero(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t val;
+ int ret;
+
+ if (!(s->autoclear_features & QCOW2_AUTOCLEAR_ALL_ZERO)) {
+ return 0; /* already marked non-zero, including version 2 */
+ }
+
+ assert(s->qcow_version >= 3);
+
+ val = cpu_to_be64(s->autoclear_features & ~QCOW2_AUTOCLEAR_ALL_ZERO);
+ ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, autoclear_features),
+ &val, sizeof(val));
+ if (ret < 0) {
+ return ret;
+ }
+ ret = bdrv_flush(bs->file->bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Only clear the in-memory flag if the header was updated successfully */
+ s->autoclear_features &= ~QCOW2_AUTOCLEAR_ALL_ZERO;
+ return 0;
+}
+
/*
* Sets the dirty bit and flushes afterwards if necessary.
*
@@ -518,16 +552,27 @@ int qcow2_mark_dirty(BlockDriverState *bs)
/*
* Clears the dirty bit and flushes before if necessary. Only call this
* function when there are no pending requests, it does not guard against
- * concurrent requests dirtying the image.
+ * concurrent requests dirtying the image. If all_zero is 0 or 1, adjust
+ * the value of s->autoclear; if -1, preserve the cached value.
*/
-static int qcow2_mark_clean(BlockDriverState *bs)
+static int qcow2_mark_clean(BlockDriverState *bs, int all_zero)
{
BDRVQcow2State *s = bs->opaque;
- if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+ if (all_zero == -1) {
+ all_zero = !!(s->autoclear_features & QCOW2_AUTOCLEAR_ALL_ZERO);
+ }
+ if (bs->backing || bs->encrypted || s->qcow_version < 3) {
+ all_zero = 0;
+ }
+ if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY ||
+ (all_zero && !(s->autoclear_features & QCOW2_AUTOCLEAR_ALL_ZERO))) {
int ret;
s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
+ if (all_zero) {
+ s->autoclear_features |= QCOW2_AUTOCLEAR_ALL_ZERO;
+ }
ret = qcow2_flush_caches(bs);
if (ret < 0) {
@@ -616,7 +661,13 @@ static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
}
if (fix && result->check_errors == 0 && result->corruptions == 0) {
- ret = qcow2_mark_clean(bs);
+ /*
+ * In the case of fixing an image, we've actually spent the
+ * time of traversing every cluster, and could thus turn the
+ * all_zero bit on if the check proves it is correct; but for
+ * now, it is easier to just always drop the all_zero bit.
+ */
+ ret = qcow2_mark_clean(bs, 0);
if (ret < 0) {
return ret;
}
@@ -1069,7 +1120,7 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
}
if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
- ret = qcow2_mark_clean(bs);
+ ret = qcow2_mark_clean(bs, -1);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
goto fail;
@@ -1865,7 +1916,7 @@ static int qcow2_reopen_prepare(BDRVReopenState *state,
goto fail;
}
- ret = qcow2_mark_clean(state->bs);
+ ret = qcow2_mark_clean(state->bs, -1);
if (ret < 0) {
goto fail;
}
@@ -2486,6 +2537,11 @@ static coroutine_fn int qcow2_co_pwritev_part(
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
+ ret = qcow2_mark_nonzero(bs);
+ if (ret < 0) {
+ goto fail_nometa;
+ }
+
while (bytes != 0 && aio_task_pool_status(aio) == 0) {
l2meta = NULL;
@@ -2586,7 +2642,7 @@ static int qcow2_inactivate(BlockDriverState *bs)
}
if (result == 0) {
- qcow2_mark_clean(bs);
+ qcow2_mark_clean(bs, -1);
}
return result;
@@ -3443,6 +3499,9 @@ qcow2_co_create(BlockdevCreateOptions *create_options, Error **errp)
header->autoclear_features |=
cpu_to_be64(QCOW2_AUTOCLEAR_DATA_FILE_RAW);
}
+ if (version >= 3 && !qcow2_opts->has_backing_file) {
+ header->autoclear_features |= cpu_to_be64(QCOW2_AUTOCLEAR_ALL_ZERO);
+ }
ret = blk_pwrite(blk, 0, header, cluster_size, 0);
g_free(header);
@@ -3793,6 +3852,11 @@ static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
}
qemu_co_mutex_lock(&s->lock);
+ /*
+ * No need to call qcow2_mark_nonzero: v2 images lack autoclear
+ * bits and so are already nonzero; v3 images pass full_discard=false
+ * so that discarded clusters still read as zero.
+ */
ret = qcow2_cluster_discard(bs, offset, bytes, QCOW2_DISCARD_REQUEST,
false);
qemu_co_mutex_unlock(&s->lock);
@@ -3902,6 +3966,11 @@ qcow2_co_copy_range_to(BlockDriverState *bs,
qemu_co_mutex_lock(&s->lock);
+ ret = qcow2_mark_nonzero(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+
while (bytes != 0) {
l2meta = NULL;
@@ -4334,6 +4403,11 @@ qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
return -ENOTSUP;
}
+ ret = qcow2_mark_nonzero(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
if (bytes == 0) {
/*
* align end of file to a sector boundary to ease reading with
@@ -4547,7 +4621,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* Now finally the in-memory information corresponds to the on-disk
* structures and is correct */
- ret = qcow2_mark_clean(bs);
+ ret = qcow2_mark_clean(bs, 1);
if (ret < 0) {
goto fail;
}
@@ -4615,6 +4689,9 @@ static int qcow2_make_empty(BlockDriverState *bs)
break;
}
}
+ if (!bs->backing && !bs->encrypted && s->qcow_version >= 3) {
+ s->autoclear_features |= QCOW2_AUTOCLEAR_ALL_ZERO;
+ }
return ret;
}
@@ -5002,7 +5079,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
/* clear incompatible features */
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
- ret = qcow2_mark_clean(bs);
+ ret = qcow2_mark_clean(bs, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to make the image clean");
return ret;
@@ -5372,7 +5449,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
s->use_lazy_refcounts = true;
} else {
/* make image clean first */
- ret = qcow2_mark_clean(bs);
+ ret = qcow2_mark_clean(bs, -1);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to make the image clean");
return ret;
@@ -243,8 +243,8 @@ enum {
QCOW2_AUTOCLEAR_ALL_ZERO = 1 << QCOW2_AUTOCLEAR_ALL_ZERO_BITNR,
QCOW2_AUTOCLEAR_MASK = QCOW2_AUTOCLEAR_BITMAPS
- | QCOW2_AUTOCLEAR_DATA_FILE_RAW,
- /* TODO: Add _ALL_ZERO to _MASK once it is handled correctly */
+ | QCOW2_AUTOCLEAR_DATA_FILE_RAW
+ | QCOW2_AUTOCLEAR_ALL_ZERO,
};
enum qcow2_discard_type {
@@ -610,6 +610,7 @@ int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
int qcow2_mark_dirty(BlockDriverState *bs);
int qcow2_mark_corrupt(BlockDriverState *bs);
+int qcow2_mark_nonzero(BlockDriverState *bs);
int qcow2_mark_consistent(BlockDriverState *bs);
int qcow2_update_header(BlockDriverState *bs);
@@ -111,7 +111,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features []
-autoclear_features []
+autoclear_features [2]
refcount_order 4
header_length 104
@@ -144,7 +144,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features []
-autoclear_features []
+autoclear_features [2]
refcount_order 4
header_length 104
@@ -177,7 +177,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features []
-autoclear_features []
+autoclear_features [2]
refcount_order 4
header_length 104
@@ -5,7 +5,7 @@ QA output created by 036
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
incompatible_features [63]
compatible_features []
-autoclear_features []
+autoclear_features [2]
qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Unsupported IMGFMT feature(s): Unknown incompatible feature: 8000000000000000
qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Unsupported IMGFMT feature(s): Test feature
@@ -23,7 +23,7 @@ qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Unsupported IMGFMT feature(s): tes
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
incompatible_features []
compatible_features []
-autoclear_features [63]
+autoclear_features [2, 63]
Header extension:
magic 0x6803f857
length 336
@@ -35,7 +35,7 @@ data <binary>
No errors were found on the image.
incompatible_features []
compatible_features []
-autoclear_features []
+autoclear_features [2]
Header extension:
magic 0x6803f857
length 336
@@ -20,7 +20,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features [0]
-autoclear_features []
+autoclear_features [2]
refcount_order 4
header_length 104
@@ -78,7 +78,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features [0]
-autoclear_features []
+autoclear_features [2]
refcount_order 4
header_length 104
@@ -189,7 +189,7 @@ nb_snapshots 0
snapshot_offset 0x0
incompatible_features []
compatible_features [42]
-autoclear_features [42]
+autoclear_features [2, 42]
refcount_order 4
header_length 104
@@ -491,6 +491,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file: TEST_DIR/t.IMGFMT.data
@@ -511,6 +512,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file: foo
@@ -524,6 +526,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file raw: false
@@ -538,6 +541,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file: TEST_DIR/t.IMGFMT.data
@@ -550,6 +554,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file: TEST_DIR/t.IMGFMT.data
@@ -563,6 +568,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
data file: TEST_DIR/t.IMGFMT.data
@@ -94,17 +94,17 @@ class TestQCow2(TestQemuImgInfo):
class TestQCow3NotLazy(TestQemuImgInfo):
'''Testing a qcow2 version 3 image with lazy refcounts disabled'''
img_options = 'compat=1.1,lazy_refcounts=off'
- json_compare = { 'compat': '1.1', 'lazy-refcounts': False,
+ json_compare = { 'compat': '1.1', 'all-zero': True, 'lazy-refcounts': False,
'refcount-bits': 16, 'corrupt': False }
- human_compare = [ 'compat: 1.1', 'lazy refcounts: false',
+ human_compare = [ 'compat: 1.1', 'all zero: true', 'lazy refcounts: false',
'refcount bits: 16', 'corrupt: false' ]
class TestQCow3Lazy(TestQemuImgInfo):
'''Testing a qcow2 version 3 image with lazy refcounts enabled'''
img_options = 'compat=1.1,lazy_refcounts=on'
- json_compare = { 'compat': '1.1', 'lazy-refcounts': True,
+ json_compare = { 'compat': '1.1', 'all-zero': True, 'lazy-refcounts': True,
'refcount-bits': 16, 'corrupt': False }
- human_compare = [ 'compat: 1.1', 'lazy refcounts: true',
+ human_compare = [ 'compat: 1.1', 'all zero: true', 'lazy refcounts: true',
'refcount bits: 16', 'corrupt: false' ]
class TestQCow3NotLazyQMP(TestQMP):
@@ -112,7 +112,7 @@ class TestQCow3NotLazyQMP(TestQMP):
with lazy refcounts enabled'''
img_options = 'compat=1.1,lazy_refcounts=off'
qemu_options = 'lazy-refcounts=on'
- compare = { 'compat': '1.1', 'lazy-refcounts': False,
+ compare = { 'compat': '1.1', 'all-zero': True, 'lazy-refcounts': False,
'refcount-bits': 16, 'corrupt': False }
@@ -121,7 +121,7 @@ class TestQCow3LazyQMP(TestQMP):
with lazy refcounts disabled'''
img_options = 'compat=1.1,lazy_refcounts=on'
qemu_options = 'lazy-refcounts=off'
- compare = { 'compat': '1.1', 'lazy-refcounts': True,
+ compare = { 'compat': '1.1', 'all-zero': True, 'lazy-refcounts': True,
'refcount-bits': 16, 'corrupt': False }
TestImageInfoSpecific = None
@@ -17,6 +17,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 4096
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -29,6 +30,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 8192
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -299,6 +301,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 4096
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -310,6 +313,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 8192
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -579,6 +583,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -590,6 +595,7 @@ virtual size: 130 MiB (136314880 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
corrupt: false
@@ -601,6 +607,7 @@ virtual size: 132 MiB (138412032 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 16
corrupt: false
@@ -18,6 +18,7 @@ virtual size: 128 MiB (134217728 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
corrupt: false
@@ -40,6 +41,7 @@ virtual size: 64 MiB (67108864 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
corrupt: false
@@ -62,6 +64,7 @@ virtual size: 32 MiB (33554432 bytes)
cluster_size: 2097152
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: true
refcount bits: 1
corrupt: false
@@ -102,6 +105,7 @@ encrypted: yes
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
refcount bits: 16
encrypt:
@@ -153,6 +153,7 @@ virtual size: 1 MiB (1048576 bytes)
cluster_size: 65536
Format specific information:
compat: 1.1
+ all zero: true
lazy refcounts: false
bitmaps:
[0]:
Wire up the autoclear bit just defined in the previous patch. When we create an image or clear it with .bdrv_make_empty, we know that it reads as all zeroes. Reading an image does not change the previous status, nor does writing zeroes, trimming (because we specifically set trimmed clusters to read as zero), or resize (because the new length reads as zero). This leaves normal writes, data copies, snapshot reverts, and altering the backing file that can change the status. Furthermore, it is not safe to claim that an encrypted image or an image with a backing file reads as all zeroes. Implementation-wise, we clear the bit from the file on the first modification, and then rewrite it when marking the image clean; some callers want to rewrite it (to either set or clear), while others want to preserve the current value; the modifications to qemu_mark_clean make it easier to consolidate the logic for when setting the bit is safe. A number of iotests have altered output, in situations where we have a provably zero image at that point in the test. Later, we may want to wire in further checks to qemu-img check that validates if the bit is set correctly, and/or to set the bit in images where it would be valid, but I did not do that here. Signed-off-by: Eric Blake <eblake@redhat.com> --- block/qcow2-snapshot.c | 11 +++++ block/qcow2.c | 97 ++++++++++++++++++++++++++++++++++---- block/qcow2.h | 5 +- tests/qemu-iotests/031.out | 6 +-- tests/qemu-iotests/036.out | 6 +-- tests/qemu-iotests/061.out | 12 +++-- tests/qemu-iotests/065 | 12 ++--- tests/qemu-iotests/082.out | 7 +++ tests/qemu-iotests/206.out | 4 ++ tests/qemu-iotests/242.out | 1 + 10 files changed, 134 insertions(+), 27 deletions(-)