Message ID | 1491057878-27868-7-git-send-email-ashijeetacharya@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, 04/01 20:14, Ashijeet Acharya wrote: > Include a next pointer in VmdkMetaData struct to point to the previous > allocated L2 table. Modify vmdk_L2update to start updating metadata for > allocation of multiple clusters at once. > > Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com> This is the metadata part of the coalesed allocation. I think patch 3 is functionally incomplete without these changes, and is perhaps broken because metadata is not handled correctly. Such an "intermediate functional regression" is not good in a series, which we need to avoid. > --- > block/vmdk.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 111 insertions(+), 25 deletions(-) > > diff --git a/block/vmdk.c b/block/vmdk.c > index 9456ddd..c7675db 100644 > --- a/block/vmdk.c > +++ b/block/vmdk.c > @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { > int valid; > uint32_t *l2_cache_entry; > uint32_t nb_clusters; > + uint32_t offset; > + struct VmdkMetaData *next; > } VmdkMetaData; > > typedef struct VmdkGrainMarker { > @@ -263,6 +265,12 @@ static inline uint64_t size_to_clusters(VmdkExtent *extent, uint64_t size) > return (DIV_ROUND_UP(size + round_off_size, BDRV_SECTOR_SIZE * 128) - 1); > } > > +static inline int64_t vmdk_align_offset(int64_t offset, int n) > +{ > + offset = (offset + n - 1) & ~(n - 1); > + return offset; > +} > + > static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) > { > char *desc; > @@ -1037,29 +1045,88 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) > } > } > > -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > - uint32_t offset) > +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, > + VmdkMetaData *m_data, bool zeroed) > { > - offset = cpu_to_le32(offset); > + int i; > + uint32_t offset, temp_offset; > + int *l2_table_array; > + int l2_array_size; > + > + if (zeroed) { > + temp_offset = VMDK_GTE_ZEROED; > + } else { > + temp_offset = m_data->offset; > + } > + > + temp_offset = cpu_to_le32(temp_offset); > + > + l2_array_size = sizeof(uint32_t) * m_data->nb_clusters; > + l2_table_array = qemu_try_blockalign(extent->file->bs, > + vmdk_align_offset(l2_array_size, 512)); Indentation is off. Use QEMU_ALIGN_UP, instead of vmdk_align_offset. 512 is a magic number, use BDRV_SECTOR_SIZE. > + if (l2_table_array == NULL) { > + return VMDK_ERROR; > + } > + memset(l2_table_array, 0, vmdk_align_offset(l2_array_size, 512)); > + > /* update L2 table */ > + offset = temp_offset; > + for (i = 0; i < m_data->nb_clusters; i++) { > + l2_table_array[i] = offset; > + if (!zeroed) { > + offset += 128; Something is going wrong here with endianness on BE host, I believe. > + } > + } > + > if (bdrv_pwrite_sync(extent->file, > - ((int64_t)m_data->l2_offset * 512) > - + (m_data->l2_index * sizeof(offset)), > - &offset, sizeof(offset)) < 0) { > + ((int64_t)m_data->l2_offset * 512) > + + ((m_data->l2_index) * sizeof(offset)), > + l2_table_array, l2_array_size) < 0) { You can fix the indentation while changing these lines. If not, don't change it, or at least don't make it uglier. > return VMDK_ERROR; > } > + > /* update backup L2 table */ > if (extent->l1_backup_table_offset != 0) { > m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; > if (bdrv_pwrite_sync(extent->file, > ((int64_t)m_data->l2_offset * 512) > - + (m_data->l2_index * sizeof(offset)), > - &offset, sizeof(offset)) < 0) { > + + ((m_data->l2_index) * sizeof(offset)), > + l2_table_array, l2_array_size) < 0) { Same here. > return VMDK_ERROR; > } > } > + > + offset = temp_offset; > if (m_data->l2_cache_entry) { > - *m_data->l2_cache_entry = offset; > + for (i = 0; i < m_data->nb_clusters; i++) { > + *m_data->l2_cache_entry = offset; > + m_data->l2_cache_entry++; > + > + if (!zeroed) { > + offset += 128; > + } > + } > + } > + > + qemu_vfree(l2_table_array); > + return VMDK_OK; > +} > + > +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, > + bool zeroed) > +{ > + int ret; > + > + while (m_data->next != NULL) { > + VmdkMetaData *next; > + > + ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); > + if (ret < 0) { > + return ret; > + } > + > + next = m_data->next; > + m_data = next; Why not simply "m_data = m_data->next" and drop "next" variable? > } > > return VMDK_OK; > @@ -1271,7 +1338,7 @@ exit: > */ > static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, > uint64_t offset, uint64_t *cluster_offset, > - int64_t *bytes, VmdkMetaData *m_data, > + int64_t *bytes, VmdkMetaData **m_data, > bool allocate, uint32_t *total_alloc_clusters) > { > int l1_index, l2_offset, l2_index; > @@ -1280,6 +1347,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, > uint32_t nb_clusters; > bool zeroed = false; > uint64_t skip_start_bytes, skip_end_bytes; > + VmdkMetaData *old_m_data; > int ret; > > ret = get_cluster_table(extent, offset, &l1_index, &l2_offset, > @@ -1330,13 +1398,21 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, > if (ret < 0) { > return ret; > } > - if (m_data) { > - m_data->valid = 1; > - m_data->l1_index = l1_index; > - m_data->l2_index = l2_index; > - m_data->l2_offset = l2_offset; > - m_data->l2_cache_entry = &l2_table[l2_index]; > - m_data->nb_clusters = nb_clusters; > + > + if (*m_data) { > + old_m_data = *m_data; > + *m_data = g_malloc0(sizeof(**m_data)); > + > + **m_data = (VmdkMetaData) { > + .valid = 1, > + .l1_index = l1_index, > + .l2_index = l2_index, > + .l2_offset = l2_offset, > + .l2_cache_entry = &l2_table[l2_index], > + .nb_clusters = nb_clusters, > + .offset = cluster_sector, > + .next = old_m_data, > + }; I think if the new m_data can be merged into the old, there is no need to allocate a new one. > } > } > *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; > @@ -1365,7 +1441,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, > */ > static int vmdk_alloc_cluster_offset(BlockDriverState *bs, > VmdkExtent *extent, > - VmdkMetaData *m_data, uint64_t offset, > + VmdkMetaData **m_data, uint64_t offset, > bool allocate, uint64_t *cluster_offset, > int64_t bytes, > uint32_t *total_alloc_clusters) > @@ -1385,8 +1461,8 @@ static int vmdk_alloc_cluster_offset(BlockDriverState *bs, > new_cluster_offset = 0; > *cluster_offset = 0; > n_bytes = 0; > - if (m_data) { > - m_data->valid = 0; > + if (*m_data) { > + (*m_data)->valid = 0; > } > > /* due to L2 table margins all bytes may not get allocated at once */ > @@ -1768,9 +1844,11 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > uint64_t cluster_offset; > uint64_t bytes_done = 0; > uint64_t extent_size; > - VmdkMetaData m_data; > + VmdkMetaData *m_data; > uint32_t total_alloc_clusters = 0; > > + m_data = g_malloc0(sizeof(*m_data)); > + > if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { > error_report("Wrong offset: offset=0x%" PRIx64 > " total_sectors=0x%" PRIx64, > @@ -1779,6 +1857,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > } > > while (bytes > 0) { > + m_data->next = NULL; > extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); > if (!extent) { > return -EIO; > @@ -1825,7 +1904,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > total_alloc_clusters; > if (!zero_dry_run) { > /* update L2 tables */ > - if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) > + if (vmdk_L2update(extent, m_data, zeroed) > != VMDK_OK) { > return -EIO; > } > @@ -1839,10 +1918,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > if (ret) { > return ret; > } > - if (m_data.valid) { > + if (m_data->valid) { > /* update L2 tables */ > - if (vmdk_L2update(extent, &m_data, > - cluster_offset >> BDRV_SECTOR_BITS) > + if (vmdk_L2update(extent, m_data, zeroed) > != VMDK_OK) { > return -EIO; > } > @@ -1852,6 +1930,13 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > offset += n_bytes; > bytes_done += n_bytes; > > + while (m_data->next != NULL) { > + VmdkMetaData *next; > + next = m_data->next; > + g_free(m_data); > + m_data = next; > + } > + > /* update CID on the first write every time the virtual disk is > * opened */ > if (!s->cid_updated) { > @@ -1862,6 +1947,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, > s->cid_updated = true; > } > } > + g_free(m_data); This is weird, you free all but the last m_data with a while loop, a few lines above, and this one with a separate g_free(). Please use one loop: for (p = m_data; p; p = next) { next = p->next; g_free(p); } > return 0; > } > > -- > 2.6.2 >
On Fri, Apr 21, 2017 at 1:45 PM, Fam Zheng <famz@redhat.com> wrote: > On Sat, 04/01 20:14, Ashijeet Acharya wrote: >> Include a next pointer in VmdkMetaData struct to point to the previous >> allocated L2 table. Modify vmdk_L2update to start updating metadata for >> allocation of multiple clusters at once. >> >> Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com> > > This is the metadata part of the coalesed allocation. I think patch 3 is > functionally incomplete without these changes, and is perhaps broken because > metadata is not handled correctly. > > Such an "intermediate functional regression" is not good in a series, which we > need to avoid. I have moved this patch right after patch 3 because merging both will result in an unnecessary huge patch. Will that work? > >> --- >> block/vmdk.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++----------- >> 1 file changed, 111 insertions(+), 25 deletions(-) >> >> diff --git a/block/vmdk.c b/block/vmdk.c >> index 9456ddd..c7675db 100644 >> --- a/block/vmdk.c >> +++ b/block/vmdk.c >> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { >> int valid; >> uint32_t *l2_cache_entry; >> uint32_t nb_clusters; >> + uint32_t offset; >> + struct VmdkMetaData *next; >> } VmdkMetaData; >> >> typedef struct VmdkGrainMarker { >> @@ -263,6 +265,12 @@ static inline uint64_t size_to_clusters(VmdkExtent *extent, uint64_t size) >> return (DIV_ROUND_UP(size + round_off_size, BDRV_SECTOR_SIZE * 128) - 1); >> } >> >> +static inline int64_t vmdk_align_offset(int64_t offset, int n) >> +{ >> + offset = (offset + n - 1) & ~(n - 1); >> + return offset; >> +} >> + >> static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) >> { >> char *desc; >> @@ -1037,29 +1045,88 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) >> } >> } >> >> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> - uint32_t offset) >> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, >> + VmdkMetaData *m_data, bool zeroed) >> { >> - offset = cpu_to_le32(offset); >> + int i; >> + uint32_t offset, temp_offset; >> + int *l2_table_array; >> + int l2_array_size; >> + >> + if (zeroed) { >> + temp_offset = VMDK_GTE_ZEROED; >> + } else { >> + temp_offset = m_data->offset; >> + } >> + >> + temp_offset = cpu_to_le32(temp_offset); >> + >> + l2_array_size = sizeof(uint32_t) * m_data->nb_clusters; >> + l2_table_array = qemu_try_blockalign(extent->file->bs, >> + vmdk_align_offset(l2_array_size, 512)); > > Indentation is off. > > Use QEMU_ALIGN_UP, instead of vmdk_align_offset. > > 512 is a magic number, use BDRV_SECTOR_SIZE. Done > >> + if (l2_table_array == NULL) { >> + return VMDK_ERROR; >> + } >> + memset(l2_table_array, 0, vmdk_align_offset(l2_array_size, 512)); >> + >> /* update L2 table */ >> + offset = temp_offset; >> + for (i = 0; i < m_data->nb_clusters; i++) { >> + l2_table_array[i] = offset; >> + if (!zeroed) { >> + offset += 128; > > Something is going wrong here with endianness on BE host, I believe. I have changed temp_offset to LE above, wouldn't that be enough. I am not sure. > >> + } >> + } >> + >> if (bdrv_pwrite_sync(extent->file, >> - ((int64_t)m_data->l2_offset * 512) >> - + (m_data->l2_index * sizeof(offset)), >> - &offset, sizeof(offset)) < 0) { >> + ((int64_t)m_data->l2_offset * 512) >> + + ((m_data->l2_index) * sizeof(offset)), >> + l2_table_array, l2_array_size) < 0) { > > You can fix the indentation while changing these lines. If not, don't change it, > or at least don't make it uglier. I have aligned it, if it still looks ugly in v4, I will revert. > >> return VMDK_ERROR; >> } >> + >> /* update backup L2 table */ >> if (extent->l1_backup_table_offset != 0) { >> m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; >> if (bdrv_pwrite_sync(extent->file, >> ((int64_t)m_data->l2_offset * 512) >> - + (m_data->l2_index * sizeof(offset)), >> - &offset, sizeof(offset)) < 0) { >> + + ((m_data->l2_index) * sizeof(offset)), >> + l2_table_array, l2_array_size) < 0) { > > Same here. > >> return VMDK_ERROR; >> } >> } >> + >> + offset = temp_offset; >> if (m_data->l2_cache_entry) { >> - *m_data->l2_cache_entry = offset; >> + for (i = 0; i < m_data->nb_clusters; i++) { >> + *m_data->l2_cache_entry = offset; >> + m_data->l2_cache_entry++; >> + >> + if (!zeroed) { >> + offset += 128; >> + } >> + } >> + } >> + >> + qemu_vfree(l2_table_array); >> + return VMDK_OK; >> +} >> + >> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, >> + bool zeroed) >> +{ >> + int ret; >> + >> + while (m_data->next != NULL) { >> + VmdkMetaData *next; >> + >> + ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); >> + if (ret < 0) { >> + return ret; >> + } >> + >> + next = m_data->next; >> + m_data = next; > > Why not simply "m_data = m_data->next" and drop "next" variable? >> } >> >> return VMDK_OK; >> @@ -1271,7 +1338,7 @@ exit: >> */ >> static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, >> uint64_t offset, uint64_t *cluster_offset, >> - int64_t *bytes, VmdkMetaData *m_data, >> + int64_t *bytes, VmdkMetaData **m_data, >> bool allocate, uint32_t *total_alloc_clusters) >> { >> int l1_index, l2_offset, l2_index; >> @@ -1280,6 +1347,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, >> uint32_t nb_clusters; >> bool zeroed = false; >> uint64_t skip_start_bytes, skip_end_bytes; >> + VmdkMetaData *old_m_data; >> int ret; >> >> ret = get_cluster_table(extent, offset, &l1_index, &l2_offset, >> @@ -1330,13 +1398,21 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, >> if (ret < 0) { >> return ret; >> } >> - if (m_data) { >> - m_data->valid = 1; >> - m_data->l1_index = l1_index; >> - m_data->l2_index = l2_index; >> - m_data->l2_offset = l2_offset; >> - m_data->l2_cache_entry = &l2_table[l2_index]; >> - m_data->nb_clusters = nb_clusters; >> + >> + if (*m_data) { >> + old_m_data = *m_data; >> + *m_data = g_malloc0(sizeof(**m_data)); >> + >> + **m_data = (VmdkMetaData) { >> + .valid = 1, >> + .l1_index = l1_index, >> + .l2_index = l2_index, >> + .l2_offset = l2_offset, >> + .l2_cache_entry = &l2_table[l2_index], >> + .nb_clusters = nb_clusters, >> + .offset = cluster_sector, >> + .next = old_m_data, >> + }; > > I think if the new m_data can be merged into the old, there is no need to > allocate a new one. Do you mean that if the clusters lie in the same l2 table, then merge them? I think this case only appears when I leave out the first and last cluster for COW. If I misunderstood, sorry! I think I will post v4 without attending this issue and we can discuss this when you are available after the weekend. > >> } >> } >> *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; >> @@ -1365,7 +1441,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, >> */ >> static int vmdk_alloc_cluster_offset(BlockDriverState *bs, >> VmdkExtent *extent, >> - VmdkMetaData *m_data, uint64_t offset, >> + VmdkMetaData **m_data, uint64_t offset, >> bool allocate, uint64_t *cluster_offset, >> int64_t bytes, >> uint32_t *total_alloc_clusters) >> @@ -1385,8 +1461,8 @@ static int vmdk_alloc_cluster_offset(BlockDriverState *bs, >> new_cluster_offset = 0; >> *cluster_offset = 0; >> n_bytes = 0; >> - if (m_data) { >> - m_data->valid = 0; >> + if (*m_data) { >> + (*m_data)->valid = 0; >> } >> >> /* due to L2 table margins all bytes may not get allocated at once */ >> @@ -1768,9 +1844,11 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> uint64_t cluster_offset; >> uint64_t bytes_done = 0; >> uint64_t extent_size; >> - VmdkMetaData m_data; >> + VmdkMetaData *m_data; >> uint32_t total_alloc_clusters = 0; >> >> + m_data = g_malloc0(sizeof(*m_data)); >> + [scroll till here] [1] So this allocation will need to move....[2] >> if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { >> error_report("Wrong offset: offset=0x%" PRIx64 >> " total_sectors=0x%" PRIx64, >> @@ -1779,6 +1857,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> } >> >> while (bytes > 0) { ....[2] here. Thus we will need to allocate it again every time we enter here otherwise the very next line m_data->next=NULL will segfault. So maybe its good to free it separately? I will retain it this way for v4 and change it otherwise if you still say so after my reasoning in v5. >> + m_data->next = NULL; >> extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); >> if (!extent) { >> return -EIO; >> @@ -1825,7 +1904,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> total_alloc_clusters; >> if (!zero_dry_run) { >> /* update L2 tables */ >> - if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) >> + if (vmdk_L2update(extent, m_data, zeroed) >> != VMDK_OK) { >> return -EIO; >> } >> @@ -1839,10 +1918,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> if (ret) { >> return ret; >> } >> - if (m_data.valid) { >> + if (m_data->valid) { >> /* update L2 tables */ >> - if (vmdk_L2update(extent, &m_data, >> - cluster_offset >> BDRV_SECTOR_BITS) >> + if (vmdk_L2update(extent, m_data, zeroed) >> != VMDK_OK) { >> return -EIO; >> } >> @@ -1852,6 +1930,13 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> offset += n_bytes; >> bytes_done += n_bytes; >> >> + while (m_data->next != NULL) { >> + VmdkMetaData *next; >> + next = m_data->next; >> + g_free(m_data); >> + m_data = next; >> + } >> + >> /* update CID on the first write every time the virtual disk is >> * opened */ >> if (!s->cid_updated) { >> @@ -1862,6 +1947,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, >> s->cid_updated = true; >> } >> } >> + g_free(m_data); > > This is weird, you free all but the last m_data with a while loop, a few lines > above, and this one with a separate g_free(). > > Please use one loop: > > for (p = m_data; p; p = next) { > next = p->next; > g_free(p); > } I have a good (maybe good enough) reason for it, if I free it in the while loop above, then I will need to allocate it again when we enter the superior while(bytes>0) loop, otherwise we will segfault for everything from that point onwards....[scroll up __^] [1] Ashijeet
diff --git a/block/vmdk.c b/block/vmdk.c index 9456ddd..c7675db 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -137,6 +137,8 @@ typedef struct VmdkMetaData { int valid; uint32_t *l2_cache_entry; uint32_t nb_clusters; + uint32_t offset; + struct VmdkMetaData *next; } VmdkMetaData; typedef struct VmdkGrainMarker { @@ -263,6 +265,12 @@ static inline uint64_t size_to_clusters(VmdkExtent *extent, uint64_t size) return (DIV_ROUND_UP(size + round_off_size, BDRV_SECTOR_SIZE * 128) - 1); } +static inline int64_t vmdk_align_offset(int64_t offset, int n) +{ + offset = (offset + n - 1) & ~(n - 1); + return offset; +} + static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) { char *desc; @@ -1037,29 +1045,88 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp) } } -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, - uint32_t offset) +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent, + VmdkMetaData *m_data, bool zeroed) { - offset = cpu_to_le32(offset); + int i; + uint32_t offset, temp_offset; + int *l2_table_array; + int l2_array_size; + + if (zeroed) { + temp_offset = VMDK_GTE_ZEROED; + } else { + temp_offset = m_data->offset; + } + + temp_offset = cpu_to_le32(temp_offset); + + l2_array_size = sizeof(uint32_t) * m_data->nb_clusters; + l2_table_array = qemu_try_blockalign(extent->file->bs, + vmdk_align_offset(l2_array_size, 512)); + if (l2_table_array == NULL) { + return VMDK_ERROR; + } + memset(l2_table_array, 0, vmdk_align_offset(l2_array_size, 512)); + /* update L2 table */ + offset = temp_offset; + for (i = 0; i < m_data->nb_clusters; i++) { + l2_table_array[i] = offset; + if (!zeroed) { + offset += 128; + } + } + if (bdrv_pwrite_sync(extent->file, - ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(offset)), - &offset, sizeof(offset)) < 0) { + ((int64_t)m_data->l2_offset * 512) + + ((m_data->l2_index) * sizeof(offset)), + l2_table_array, l2_array_size) < 0) { return VMDK_ERROR; } + /* update backup L2 table */ if (extent->l1_backup_table_offset != 0) { m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; if (bdrv_pwrite_sync(extent->file, ((int64_t)m_data->l2_offset * 512) - + (m_data->l2_index * sizeof(offset)), - &offset, sizeof(offset)) < 0) { + + ((m_data->l2_index) * sizeof(offset)), + l2_table_array, l2_array_size) < 0) { return VMDK_ERROR; } } + + offset = temp_offset; if (m_data->l2_cache_entry) { - *m_data->l2_cache_entry = offset; + for (i = 0; i < m_data->nb_clusters; i++) { + *m_data->l2_cache_entry = offset; + m_data->l2_cache_entry++; + + if (!zeroed) { + offset += 128; + } + } + } + + qemu_vfree(l2_table_array); + return VMDK_OK; +} + +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, + bool zeroed) +{ + int ret; + + while (m_data->next != NULL) { + VmdkMetaData *next; + + ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed); + if (ret < 0) { + return ret; + } + + next = m_data->next; + m_data = next; } return VMDK_OK; @@ -1271,7 +1338,7 @@ exit: */ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, uint64_t offset, uint64_t *cluster_offset, - int64_t *bytes, VmdkMetaData *m_data, + int64_t *bytes, VmdkMetaData **m_data, bool allocate, uint32_t *total_alloc_clusters) { int l1_index, l2_offset, l2_index; @@ -1280,6 +1347,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, uint32_t nb_clusters; bool zeroed = false; uint64_t skip_start_bytes, skip_end_bytes; + VmdkMetaData *old_m_data; int ret; ret = get_cluster_table(extent, offset, &l1_index, &l2_offset, @@ -1330,13 +1398,21 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, if (ret < 0) { return ret; } - if (m_data) { - m_data->valid = 1; - m_data->l1_index = l1_index; - m_data->l2_index = l2_index; - m_data->l2_offset = l2_offset; - m_data->l2_cache_entry = &l2_table[l2_index]; - m_data->nb_clusters = nb_clusters; + + if (*m_data) { + old_m_data = *m_data; + *m_data = g_malloc0(sizeof(**m_data)); + + **m_data = (VmdkMetaData) { + .valid = 1, + .l1_index = l1_index, + .l2_index = l2_index, + .l2_offset = l2_offset, + .l2_cache_entry = &l2_table[l2_index], + .nb_clusters = nb_clusters, + .offset = cluster_sector, + .next = old_m_data, + }; } } *cluster_offset = cluster_sector << BDRV_SECTOR_BITS; @@ -1365,7 +1441,7 @@ static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent, */ static int vmdk_alloc_cluster_offset(BlockDriverState *bs, VmdkExtent *extent, - VmdkMetaData *m_data, uint64_t offset, + VmdkMetaData **m_data, uint64_t offset, bool allocate, uint64_t *cluster_offset, int64_t bytes, uint32_t *total_alloc_clusters) @@ -1385,8 +1461,8 @@ static int vmdk_alloc_cluster_offset(BlockDriverState *bs, new_cluster_offset = 0; *cluster_offset = 0; n_bytes = 0; - if (m_data) { - m_data->valid = 0; + if (*m_data) { + (*m_data)->valid = 0; } /* due to L2 table margins all bytes may not get allocated at once */ @@ -1768,9 +1844,11 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t cluster_offset; uint64_t bytes_done = 0; uint64_t extent_size; - VmdkMetaData m_data; + VmdkMetaData *m_data; uint32_t total_alloc_clusters = 0; + m_data = g_malloc0(sizeof(*m_data)); + if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) { error_report("Wrong offset: offset=0x%" PRIx64 " total_sectors=0x%" PRIx64, @@ -1779,6 +1857,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, } while (bytes > 0) { + m_data->next = NULL; extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent); if (!extent) { return -EIO; @@ -1825,7 +1904,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, total_alloc_clusters; if (!zero_dry_run) { /* update L2 tables */ - if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED) + if (vmdk_L2update(extent, m_data, zeroed) != VMDK_OK) { return -EIO; } @@ -1839,10 +1918,9 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, if (ret) { return ret; } - if (m_data.valid) { + if (m_data->valid) { /* update L2 tables */ - if (vmdk_L2update(extent, &m_data, - cluster_offset >> BDRV_SECTOR_BITS) + if (vmdk_L2update(extent, m_data, zeroed) != VMDK_OK) { return -EIO; } @@ -1852,6 +1930,13 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, offset += n_bytes; bytes_done += n_bytes; + while (m_data->next != NULL) { + VmdkMetaData *next; + next = m_data->next; + g_free(m_data); + m_data = next; + } + /* update CID on the first write every time the virtual disk is * opened */ if (!s->cid_updated) { @@ -1862,6 +1947,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset, s->cid_updated = true; } } + g_free(m_data); return 0; }
Include a next pointer in VmdkMetaData struct to point to the previous allocated L2 table. Modify vmdk_L2update to start updating metadata for allocation of multiple clusters at once. Signed-off-by: Ashijeet Acharya <ashijeetacharya@gmail.com> --- block/vmdk.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 111 insertions(+), 25 deletions(-)