[11/20] qcow2: External file I/O
diff mbox series

Message ID 20190227172256.30368-12-kwolf@redhat.com
State New
Headers show
Series
  • qcow2: External data files
Related show

Commit Message

Kevin Wolf Feb. 27, 2019, 5:22 p.m. UTC
This changes the qcow2 implementation to direct all guest data I/O to
s->data_file rather than bs->file, while metadata I/O still uses
bs->file. At the moment, this is still always the same, but soon we'll
add options to set s->data_file to an external data file.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.h          |  2 +-
 block/qcow2-bitmap.c   |  7 +++---
 block/qcow2-cache.c    |  6 ++---
 block/qcow2-cluster.c  | 46 +++++++++++++++++++++++++++++++------
 block/qcow2-refcount.c | 39 +++++++++++++++++++++++--------
 block/qcow2-snapshot.c |  7 +++---
 block/qcow2.c          | 52 +++++++++++++++++++++++++++++++++---------
 7 files changed, 122 insertions(+), 37 deletions(-)

Patch
diff mbox series

diff --git a/block/qcow2.h b/block/qcow2.h
index fad6abf602..aac7fc4348 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -622,7 +622,7 @@  void qcow2_process_discards(BlockDriverState *bs, int ret);
 int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
                                  int64_t size);
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size);
+                                  int64_t size, bool data_file);
 int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
                              void **refcount_table,
                              int64_t *refcount_table_size,
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
index 3ee524da4b..9d968bdcda 100644
--- a/block/qcow2-bitmap.c
+++ b/block/qcow2-bitmap.c
@@ -778,7 +778,8 @@  static int bitmap_list_store(BlockDriverState *bs, Qcow2BitmapList *bm_list,
      * directory in-place (actually, turn-off the extension), which is checked
      * in qcow2_check_metadata_overlap() */
     ret = qcow2_pre_write_overlap_check(
-            bs, in_place ? QCOW2_OL_BITMAP_DIRECTORY : 0, dir_offset, dir_size);
+            bs, in_place ? QCOW2_OL_BITMAP_DIRECTORY : 0, dir_offset, dir_size,
+            false);
     if (ret < 0) {
         goto fail;
     }
@@ -1224,7 +1225,7 @@  static uint64_t *store_bitmap_data(BlockDriverState *bs,
             memset(buf + write_size, 0, s->cluster_size - write_size);
         }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0, off, s->cluster_size);
+        ret = qcow2_pre_write_overlap_check(bs, 0, off, s->cluster_size, false);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Qcow2 overlap check failed");
             goto fail;
@@ -1292,7 +1293,7 @@  static int store_bitmap(BlockDriverState *bs, Qcow2Bitmap *bm, Error **errp)
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0, tb_offset,
-                                        tb_size * sizeof(tb[0]));
+                                        tb_size * sizeof(tb[0]), false);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Qcow2 overlap check failed");
         goto fail;
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index d9dafa31e5..df02e7b20a 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -205,13 +205,13 @@  static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
 
     if (c == s->refcount_block_cache) {
         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-                c->entries[i].offset, c->table_size);
+                c->entries[i].offset, c->table_size, false);
     } else if (c == s->l2_table_cache) {
         ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                c->entries[i].offset, c->table_size);
+                c->entries[i].offset, c->table_size, false);
     } else {
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                c->entries[i].offset, c->table_size);
+                c->entries[i].offset, c->table_size, false);
     }
 
     if (ret < 0) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 8c4b4005ff..7579f5a5ae 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -153,7 +153,7 @@  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     /* the L1 position has not yet been updated, so these clusters must
      * indeed be completely free */
     ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
-                                        new_l1_size2);
+                                        new_l1_size2, false);
     if (ret < 0) {
         goto fail;
     }
@@ -238,7 +238,7 @@  int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
     }
 
     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-            s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
+            s->l1_table_offset + 8 * l1_start_index, sizeof(buf), false);
     if (ret < 0) {
         return ret;
     }
@@ -490,6 +490,7 @@  static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
                                              unsigned offset_in_cluster,
                                              QEMUIOVector *qiov)
 {
+    BDRVQcow2State *s = bs->opaque;
     int ret;
 
     if (qiov->size == 0) {
@@ -497,13 +498,13 @@  static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + offset_in_cluster, qiov->size);
+            cluster_offset + offset_in_cluster, qiov->size, true);
     if (ret < 0) {
         return ret;
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
+    ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster,
                           qiov->size, qiov, 0);
     if (ret < 0) {
         return ret;
@@ -607,6 +608,14 @@  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     }
     switch (type) {
     case QCOW2_CLUSTER_COMPRESSED:
+        if (has_data_file(bs)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster "
+                                    "entry found in image with external data "
+                                    "file (L2 offset: %#" PRIx64 ", L2 index: "
+                                    "%#x)", l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
         /* Compressed clusters can only be processed one by one */
         c = 1;
         *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
@@ -633,6 +642,17 @@  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
             ret = -EIO;
             goto fail;
         }
+        if (has_data_file(bs) && *cluster_offset != offset - offset_in_cluster)
+        {
+            qcow2_signal_corruption(bs, true, -1, -1,
+                                    "External data file host cluster offset %#"
+                                    PRIx64 " does not match guest cluster "
+                                    "offset: %#" PRIx64
+                                    ", L2 index: %#x)", *cluster_offset,
+                                    offset - offset_in_cluster, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
         break;
     default:
         abort();
@@ -753,6 +773,10 @@  int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     int64_t cluster_offset;
     int nb_csectors;
 
+    if (has_data_file(bs)) {
+        return 0;
+    }
+
     ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
     if (ret < 0) {
         return ret;
@@ -1243,6 +1267,13 @@  static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
     trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
                                          *host_offset, *nb_clusters);
 
+    if (has_data_file(bs)) {
+        assert(*host_offset == INV_OFFSET ||
+               *host_offset == start_of_cluster(s, guest_offset));
+        *host_offset = start_of_cluster(s, guest_offset);
+        return 0;
+    }
+
     /* Allocate new clusters */
     trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
     if (*host_offset == INV_OFFSET) {
@@ -1919,7 +1950,7 @@  static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 }
 
                 ret = qcow2_pre_write_overlap_check(bs, 0, offset,
-                                                    s->cluster_size);
+                                                    s->cluster_size, true);
                 if (ret < 0) {
                     if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                         qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1928,7 +1959,8 @@  static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     goto fail;
                 }
 
-                ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
+                ret = bdrv_pwrite_zeroes(s->data_file, offset,
+                                         s->cluster_size, 0);
                 if (ret < 0) {
                     if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                         qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1955,7 +1987,7 @@  static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 if (l2_dirty) {
                     ret = qcow2_pre_write_overlap_check(
                         bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
-                        slice_offset, slice_size2);
+                        slice_offset, slice_size2, false);
                     if (ret < 0) {
                         goto fail;
                     }
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 05e7974d7e..df73580e5d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1156,8 +1156,20 @@  void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
                              int nb_clusters, enum qcow2_discard_type type)
 {
     BDRVQcow2State *s = bs->opaque;
+    QCow2ClusterType ctype = qcow2_get_cluster_type(bs, l2_entry);
 
-    switch (qcow2_get_cluster_type(bs, l2_entry)) {
+    if (has_data_file(bs)) {
+        if (s->discard_passthrough[type] &&
+            (ctype == QCOW2_CLUSTER_NORMAL ||
+             ctype == QCOW2_CLUSTER_ZERO_ALLOC))
+        {
+            bdrv_pdiscard(s->data_file, l2_entry & L2E_OFFSET_MASK,
+                          nb_clusters << s->cluster_bits);
+        }
+        return;
+    }
+
+    switch (ctype) {
     case QCOW2_CLUSTER_COMPRESSED:
         {
             int nb_csectors;
@@ -1649,7 +1661,7 @@  static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
                         l2_table[i] = cpu_to_be64(l2_entry);
                         ret = qcow2_pre_write_overlap_check(bs,
                                 QCOW2_OL_ACTIVE_L2 | QCOW2_OL_INACTIVE_L2,
-                                l2e_offset, sizeof(uint64_t));
+                                l2e_offset, sizeof(uint64_t), false);
                         if (ret < 0) {
                             fprintf(stderr, "ERROR: Overlap check failed\n");
                             res->check_errors++;
@@ -1898,7 +1910,8 @@  static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
 
         if (l2_dirty) {
             ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                                                l2_offset, s->cluster_size);
+                                                l2_offset, s->cluster_size,
+                                                false);
             if (ret < 0) {
                 fprintf(stderr, "ERROR: Could not write L2 table; metadata "
                         "overlap check failed: %s\n", strerror(-ret));
@@ -2366,7 +2379,7 @@  write_refblocks:
         }
 
         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
-                                            s->cluster_size);
+                                            s->cluster_size, false);
         if (ret < 0) {
             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
             goto fail;
@@ -2417,7 +2430,8 @@  write_refblocks:
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
-                                        reftable_size * sizeof(uint64_t));
+                                        reftable_size * sizeof(uint64_t),
+                                        false);
     if (ret < 0) {
         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
         goto fail;
@@ -2751,10 +2765,15 @@  QEMU_BUILD_BUG_ON(QCOW2_OL_MAX_BITNR != ARRAY_SIZE(metadata_ol_names));
  * overlaps; or a negative value (-errno) on error.
  */
 int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size)
+                                  int64_t size, bool data_file)
 {
-    int ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
+    int ret;
+
+    if (data_file && has_data_file(bs)) {
+        return 0;
+    }
 
+    ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
     if (ret < 0) {
         return ret;
     } else if (ret > 0) {
@@ -2855,7 +2874,8 @@  static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
     if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
         offset = (*reftable)[reftable_index];
 
-        ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+        ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size,
+                                            false);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Overlap check failed");
             return ret;
@@ -3121,7 +3141,8 @@  int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
 
     /* Write the new reftable */
     ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
-                                        new_reftable_size * sizeof(uint64_t));
+                                        new_reftable_size * sizeof(uint64_t),
+                                        false);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Overlap check failed");
         goto done;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 20e8472191..5ae3407f68 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -184,7 +184,7 @@  static int qcow2_write_snapshots(BlockDriverState *bs)
 
     /* The snapshot list position has not yet been updated, so these clusters
      * must indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size);
+    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size, false);
     if (ret < 0) {
         goto fail;
     }
@@ -389,7 +389,7 @@  int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
-                                        s->l1_size * sizeof(uint64_t));
+                                        s->l1_size * sizeof(uint64_t), false);
     if (ret < 0) {
         goto fail;
     }
@@ -528,7 +528,8 @@  int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
     }
 
     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-                                        s->l1_table_offset, cur_l1_bytes);
+                                        s->l1_table_offset, cur_l1_bytes,
+                                        false);
     if (ret < 0) {
         goto fail;
     }
diff --git a/block/qcow2.c b/block/qcow2.c
index 8dc6f21047..b77d856007 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -140,7 +140,7 @@  static ssize_t qcow2_crypto_hdr_init_func(QCryptoBlock *block, size_t headerlen,
     /* Zero fill remaining space in cluster so it has predictable
      * content in case of future spec changes */
     clusterlen = size_to_clusters(s, headerlen) * s->cluster_size;
-    assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen) == 0);
+    assert(qcow2_pre_write_overlap_check(bs, 0, ret, clusterlen, false) == 0);
     ret = bdrv_pwrite_zeroes(bs->file,
                              ret + headerlen,
                              clusterlen - headerlen, 0);
@@ -1953,7 +1953,7 @@  static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
                  */
                 if (!cluster_data) {
                     cluster_data =
-                        qemu_try_blockalign(bs->file->bs,
+                        qemu_try_blockalign(s->data_file->bs,
                                             QCOW_MAX_CRYPT_CLUSTERS
                                             * s->cluster_size);
                     if (cluster_data == NULL) {
@@ -1969,7 +1969,7 @@  static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_preadv(bs->file,
+            ret = bdrv_co_preadv(s->data_file,
                                  cluster_offset + offset_in_cluster,
                                  cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
@@ -2128,7 +2128,7 @@  static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
         }
 
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + offset_in_cluster, cur_bytes);
+                cluster_offset + offset_in_cluster, cur_bytes, true);
         if (ret < 0) {
             goto fail;
         }
@@ -2142,7 +2142,7 @@  static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
             BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
             trace_qcow2_writev_data(qemu_coroutine_self(),
                                     cluster_offset + offset_in_cluster);
-            ret = bdrv_co_pwritev(bs->file,
+            ret = bdrv_co_pwritev(s->data_file,
                                   cluster_offset + offset_in_cluster,
                                   cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
@@ -3344,7 +3344,7 @@  qcow2_co_copy_range_from(BlockDriverState *bs,
             goto out;
 
         case QCOW2_CLUSTER_NORMAL:
-            child = bs->file;
+            child = s->data_file;
             copy_offset += offset_into_cluster(s, src_offset);
             if ((copy_offset & 511) != 0) {
                 ret = -EIO;
@@ -3414,14 +3414,14 @@  qcow2_co_copy_range_to(BlockDriverState *bs,
         assert((cluster_offset & 511) == 0);
 
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + offset_in_cluster, cur_bytes);
+                cluster_offset + offset_in_cluster, cur_bytes, true);
         if (ret < 0) {
             goto fail;
         }
 
         qemu_co_mutex_unlock(&s->lock);
         ret = bdrv_co_copy_range_to(src, src_offset,
-                                    bs->file,
+                                    s->data_file,
                                     cluster_offset + offset_in_cluster,
                                     cur_bytes, read_flags, write_flags);
         qemu_co_mutex_lock(&s->lock);
@@ -3576,6 +3576,17 @@  static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
         int64_t old_file_size, new_file_size;
         uint64_t nb_new_data_clusters, nb_new_l2_tables;
 
+        /* With a data file, preallocation means just allocating the metadata
+         * and forwarding the truncate request to the data file */
+        if (has_data_file(bs)) {
+            ret = preallocate_co(bs, old_length, offset);
+            if (ret < 0) {
+                error_setg_errno(errp, -ret, "Preallocation failed");
+                goto fail;
+            }
+            break;
+        }
+
         old_file_size = bdrv_getlength(bs->file->bs);
         if (old_file_size < 0) {
             error_setg_errno(errp, -old_file_size,
@@ -3684,6 +3695,16 @@  static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
 
     bs->total_sectors = offset / BDRV_SECTOR_SIZE;
 
+    if (has_data_file(bs)) {
+        if (prealloc == PREALLOC_MODE_METADATA) {
+            prealloc = PREALLOC_MODE_OFF;
+        }
+        ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
     /* write updated header.size */
     offset = cpu_to_be64(offset);
     ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
@@ -3887,6 +3908,10 @@  qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
+    if (has_data_file(bs)) {
+        return -ENOTSUP;
+    }
+
     if (bytes == 0) {
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
@@ -3938,7 +3963,7 @@  qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
         goto fail;
     }
 
-    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
+    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len, true);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
         goto fail;
@@ -3950,8 +3975,8 @@  qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
     };
     qemu_iovec_init_external(&hd_qiov, &iov, 1);
 
-    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
+    ret = bdrv_co_pwritev(s->data_file, cluster_offset, out_len, &hd_qiov, 0);
     if (ret < 0) {
         goto fail;
     }
@@ -4543,6 +4568,11 @@  static int qcow2_downgrade(BlockDriverState *bs, int target_version,
         return -ENOTSUP;
     }
 
+    if (has_data_file(bs)) {
+        error_setg(errp, "Cannot downgrade an image with a data file");
+        return -ENOTSUP;
+    }
+
     /* clear incompatible features */
     if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
         ret = qcow2_mark_clean(bs);