diff mbox series

[3/6] parallels: Add checking and repairing duplicate offsets in BAT

Message ID 20220902085300.508078-4-alexander.ivanov@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series parallels: Add duplication check, repair at open, fix bugs | expand

Commit Message

Alexander Ivanov Sept. 2, 2022, 8:52 a.m. UTC
Cluster offsets must be unique among all BAT entries.
Find duplicate offsets in the BAT.

If a duplicated offset is found fix it by copying the content
of the relevant cluster to a new allocated cluster and
set the new cluster offset to the duplicated entry.

Add host_cluster_index() helper to deduplicate the code.
Add highest_offset() helper. It will be used for code deduplication
in the next patch.

Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
---
 block/parallels.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 136 insertions(+)

Comments

Denis V. Lunev Sept. 7, 2022, 4:14 p.m. UTC | #1
On 9/2/22 10:52, Alexander Ivanov wrote:
> Cluster offsets must be unique among all BAT entries.
> Find duplicate offsets in the BAT.
>
> If a duplicated offset is found fix it by copying the content
> of the relevant cluster to a new allocated cluster and
> set the new cluster offset to the duplicated entry.
>
> Add host_cluster_index() helper to deduplicate the code.
> Add highest_offset() helper. It will be used for code deduplication
> in the next patch.
>
> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> ---
>   block/parallels.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 136 insertions(+)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index dbcaf5d310..339ce45634 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -136,6 +136,26 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
>       return MIN(nb_sectors, ret);
>   }
>   
> +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
> +{
> +    off -= s->header->data_off << BDRV_SECTOR_BITS;
> +    return off / s->cluster_size;
not properly aligned cluster is a problem by itself
This is possible for older image format and I believe that
we should run "leak check" even for it and note that
in "not aligned" case one should call 'qemu-img convert"
and copy what can be saved. The error is really hard to
recover.

> +}
> +
> +static int64_t highest_offset(BDRVParallelsState *s)
> +{
> +    int64_t off, high_off = 0;
> +    int i;
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off > high_off) {
> +            high_off = off;
> +        }
> +    }
> +    return high_off;
> +}
> +
>   static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
>                               int nb_sectors, int *pnum)
>   {
> @@ -547,6 +567,114 @@ static int parallels_check_leak(BlockDriverState *bs,
>       return 0;
>   }
>   
> +static int parallels_check_duplicate(BlockDriverState *bs,
> +                                     BdrvCheckResult *res,
> +                                     BdrvCheckMode fix)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    QEMUIOVector qiov;
> +    int64_t off, high_off, sector;
> +    unsigned long *bitmap;
> +    uint32_t i, bitmap_size, cluster_index;
> +    int n, ret = 0;
> +    uint64_t *buf = NULL;
> +    bool new_allocations = false;
> +
> +    high_off = highest_offset(s);
> +    if (high_off == 0) {
> +        return 0;
> +    }
> +
> +    /*
> +     * Create a bitmap of used clusters.
> +     * If a bit is set, there is a BAT entry pointing to this cluster.
> +     * Loop through the BAT entrues, check bits relevant to an entry offset.
s/entrues/entries/
> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
> +     */
> +    bitmap_size = host_cluster_index(s, high_off) + 1;
> +    bitmap = bitmap_new(bitmap_size);
this is wrong de-facto. once you deduplicate, you will have clusters
outside the bitmap. We should use MAX(high_off, virtual_size(bds))
OK, may be this is correct, see my note below, but that's needs
a comment.

> +
> +    buf = g_malloc(s->cluster_size);
nope-nope. you should use memalign as you could have
O_DIRECT IO later on with this buffer

> +    qemu_iovec_init(&qiov, 0);
> +    qemu_iovec_add(&qiov, buf, s->cluster_size);
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off == 0) {
> +            continue;
> +        }
> +
> +        cluster_index = host_cluster_index(s, off);
> +        if (test_bit(cluster_index, bitmap)) {
> +            /* this cluster duplicates another one */
> +            fprintf(stderr,
> +                    "%s duplicate offset in BAT entry %u\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
> +
> +            res->corruptions++;
> +
> +            if (fix & BDRV_FIX_ERRORS) {
> +                /*
> +                 * Reset the entry and allocate a new cluster
> +                 * for the relevant guest offset. In this way we let
> +                 * the lower layer to place the new cluster properly.
> +                 * Copy the original cluster to the allocated one.
> +                 */
> +                parallels_set_bat_entry(s, i, 0);
> +
> +                ret = bdrv_pread(bs->file, off, s->cluster_size, buf, 0);
I believe that we are for sure in co-routine context thus
bdrv_co_pread would be better


> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out;
> +                }
> +
> +                sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
> +                off = allocate_clusters(bs, sector, s->tracks, &n);
no way. Please do not keep bytes and sectors in the same variable
That makes code unreadable

> +                if (off < 0) {
> +                    res->check_errors++;
> +                    ret = off;
> +                    goto out;
> +                }
> +                off <<= BDRV_SECTOR_BITS;
> +                if (off > high_off) {
> +                    high_off = off;
> +                }
> +
> +                ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out;
> +                }
> +
> +                new_allocations = true;
> +                res->corruptions_fixed++;
We are not setting the bit for the new cluster offset and that requires
a note, that all out of image clusters are already cleared and we
could safely use this position.

Anyway, this is potentially not enough. allocate_cluster() in the
further iterations will reuse holed offsets inside the image and
this potentially puts us into the trouble. This should be noted.

> +            }
> +
> +        } else {
> +            bitmap_set(bitmap, cluster_index, 1);
> +        }
> +    }
> +
> +    if (new_allocations) {
> +        /*
> +         * When new clusters are allocated, file size increases
> +         * by 128 Mb blocks. We need to truncate the file to the
> +         * right size.
> +         */
> +        ret = parallels_handle_leak(bs, res, high_off, true);
> +        if (ret < 0) {
> +            res->check_errors++;
> +            goto out;
> +        }
> +    }
This is redundant I believe. parallels_close should do the job well.
More over, we will have leak detector triggered and thus false
error printed.


> +
> +out:
> +    qemu_iovec_destroy(&qiov);
> +    g_free(buf);
> +    g_free(bitmap);
> +    return ret;
> +}
> +
>   static void parallels_collect_statistics(BlockDriverState *bs,
>                                            BdrvCheckResult *res,
>                                            BdrvCheckMode fix)
> @@ -595,6 +723,14 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
>               return ret;
>           }
>   
> +        /* This checks only for "WithouFreSpacExt" format */
> +        if (!memcmp(s->header->magic, HEADER_MAGIC2, 16)) {
That is I am seriously against. Why only newer format supported?

> +            ret = parallels_check_duplicate(bs, res, fix);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
>           parallels_collect_statistics(bs, res, fix);
>       }
>
Denis V. Lunev Sept. 8, 2022, 5:15 p.m. UTC | #2
On 9/2/22 10:52, Alexander Ivanov wrote:
> Cluster offsets must be unique among all BAT entries.
> Find duplicate offsets in the BAT.
>
> If a duplicated offset is found fix it by copying the content
> of the relevant cluster to a new allocated cluster and
> set the new cluster offset to the duplicated entry.
>
> Add host_cluster_index() helper to deduplicate the code.
> Add highest_offset() helper. It will be used for code deduplication
> in the next patch.
>
> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
> ---
>   block/parallels.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 136 insertions(+)
>
> diff --git a/block/parallels.c b/block/parallels.c
> index dbcaf5d310..339ce45634 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -136,6 +136,26 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
>       return MIN(nb_sectors, ret);
>   }
>   
> +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
> +{
> +    off -= s->header->data_off << BDRV_SECTOR_BITS;
> +    return off / s->cluster_size;
> +}
> +
> +static int64_t highest_offset(BDRVParallelsState *s)
> +{
> +    int64_t off, high_off = 0;
> +    int i;
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off > high_off) {
> +            high_off = off;
> +        }
> +    }
> +    return high_off;
> +}
> +
>   static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
>                               int nb_sectors, int *pnum)
>   {
> @@ -547,6 +567,114 @@ static int parallels_check_leak(BlockDriverState *bs,
>       return 0;
>   }
>   
> +static int parallels_check_duplicate(BlockDriverState *bs,
> +                                     BdrvCheckResult *res,
> +                                     BdrvCheckMode fix)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    QEMUIOVector qiov;
> +    int64_t off, high_off, sector;
> +    unsigned long *bitmap;
> +    uint32_t i, bitmap_size, cluster_index;
> +    int n, ret = 0;
> +    uint64_t *buf = NULL;
> +    bool new_allocations = false;
> +
> +    high_off = highest_offset(s);
> +    if (high_off == 0) {
> +        return 0;
> +    }
> +
> +    /*
> +     * Create a bitmap of used clusters.
> +     * If a bit is set, there is a BAT entry pointing to this cluster.
> +     * Loop through the BAT entrues, check bits relevant to an entry offset.
> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
> +     */
> +    bitmap_size = host_cluster_index(s, high_off) + 1;
> +    bitmap = bitmap_new(bitmap_size);
> +
> +    buf = g_malloc(s->cluster_size);
> +    qemu_iovec_init(&qiov, 0);
> +    qemu_iovec_add(&qiov, buf, s->cluster_size);
> +
> +    for (i = 0; i < s->bat_size; i++) {
> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
> +        if (off == 0) {
> +            continue;
> +        }
> +
> +        cluster_index = host_cluster_index(s, off);
> +        if (test_bit(cluster_index, bitmap)) {
> +            /* this cluster duplicates another one */
> +            fprintf(stderr,
> +                    "%s duplicate offset in BAT entry %u\n",
> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
> +
> +            res->corruptions++;
> +
> +            if (fix & BDRV_FIX_ERRORS) {
> +                /*
> +                 * Reset the entry and allocate a new cluster
> +                 * for the relevant guest offset. In this way we let
> +                 * the lower layer to place the new cluster properly.
> +                 * Copy the original cluster to the allocated one.
> +                 */
> +                parallels_set_bat_entry(s, i, 0);
> +
> +                ret = bdrv_pread(bs->file, off, s->cluster_size, buf, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out;
> +                }
> +
> +                sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
> +                off = allocate_clusters(bs, sector, s->tracks, &n);
> +                if (off < 0) {
> +                    res->check_errors++;
> +                    ret = off;
> +                    goto out;
> +                }
> +                off <<= BDRV_SECTOR_BITS;
> +                if (off > high_off) {
> +                    high_off = off;
> +                }
> +
> +                ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0);
> +                if (ret < 0) {
> +                    res->check_errors++;
> +                    goto out;
> +                }
> +
> +                new_allocations = true;
> +                res->corruptions_fixed++;
> +            }
> +
> +        } else {
> +            bitmap_set(bitmap, cluster_index, 1);
> +        }
> +    }
> +
> +    if (new_allocations) {
> +        /*
> +         * When new clusters are allocated, file size increases
> +         * by 128 Mb blocks. We need to truncate the file to the
> +         * right size.
> +         */
> +        ret = parallels_handle_leak(bs, res, high_off, true);
> +        if (ret < 0) {
> +            res->check_errors++;
> +            goto out;
> +        }
> +    }
OK. I have re-read the code with test case handy and now
understand the situation completely.

The problem is that img_check() routine calls bdrv_check()
actually TWICE without image reopening and thus we
comes to some trouble on the second check as we have
had preallocated some space inside the image. This
is root of the problem.

Though this kind of the fix seems like overkill, I still do not
like the resulted code. It at least do not scale with the checks
which we will add further.

I think that we could do that in two ways:
* temporary set prealloc_mode to none at start of parallels_co_check
   and return it back at the end
* parallels_leak_check should just set data_end and do nothing
    more + we should have truncate at the end of the
    parallels_co_check() if we have had performed ANY fix

Den

> +
> +out:
> +    qemu_iovec_destroy(&qiov);
> +    g_free(buf);
> +    g_free(bitmap);
> +    return ret;
> +}
> +
>   static void parallels_collect_statistics(BlockDriverState *bs,
>                                            BdrvCheckResult *res,
>                                            BdrvCheckMode fix)
> @@ -595,6 +723,14 @@ static int coroutine_fn parallels_co_check(BlockDriverState *bs,
>               return ret;
>           }
>   
> +        /* This checks only for "WithouFreSpacExt" format */
> +        if (!memcmp(s->header->magic, HEADER_MAGIC2, 16)) {
> +            ret = parallels_check_duplicate(bs, res, fix);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
>           parallels_collect_statistics(bs, res, fix);
>       }
>
Denis V. Lunev Sept. 8, 2022, 5:45 p.m. UTC | #3
On 9/8/22 19:15, Denis V. Lunev wrote:
> On 9/2/22 10:52, Alexander Ivanov wrote:
>> Cluster offsets must be unique among all BAT entries.
>> Find duplicate offsets in the BAT.
>>
>> If a duplicated offset is found fix it by copying the content
>> of the relevant cluster to a new allocated cluster and
>> set the new cluster offset to the duplicated entry.
>>
>> Add host_cluster_index() helper to deduplicate the code.
>> Add highest_offset() helper. It will be used for code deduplication
>> in the next patch.
>>
>> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
>> ---
>>   block/parallels.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 136 insertions(+)
>>
>> diff --git a/block/parallels.c b/block/parallels.c
>> index dbcaf5d310..339ce45634 100644
>> --- a/block/parallels.c
>> +++ b/block/parallels.c
>> @@ -136,6 +136,26 @@ static int cluster_remainder(BDRVParallelsState 
>> *s, int64_t sector_num,
>>       return MIN(nb_sectors, ret);
>>   }
>>   +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t 
>> off)
>> +{
>> +    off -= s->header->data_off << BDRV_SECTOR_BITS;
>> +    return off / s->cluster_size;
>> +}
>> +
>> +static int64_t highest_offset(BDRVParallelsState *s)
>> +{
>> +    int64_t off, high_off = 0;
>> +    int i;
>> +
>> +    for (i = 0; i < s->bat_size; i++) {
>> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
>> +        if (off > high_off) {
>> +            high_off = off;
>> +        }
>> +    }
>> +    return high_off;
>> +}
>> +
>>   static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
>>                               int nb_sectors, int *pnum)
>>   {
>> @@ -547,6 +567,114 @@ static int 
>> parallels_check_leak(BlockDriverState *bs,
>>       return 0;
>>   }
>>   +static int parallels_check_duplicate(BlockDriverState *bs,
>> +                                     BdrvCheckResult *res,
>> +                                     BdrvCheckMode fix)
>> +{
>> +    BDRVParallelsState *s = bs->opaque;
>> +    QEMUIOVector qiov;
>> +    int64_t off, high_off, sector;
>> +    unsigned long *bitmap;
>> +    uint32_t i, bitmap_size, cluster_index;
>> +    int n, ret = 0;
>> +    uint64_t *buf = NULL;
>> +    bool new_allocations = false;
>> +
>> +    high_off = highest_offset(s);
>> +    if (high_off == 0) {
>> +        return 0;
>> +    }
>> +
>> +    /*
>> +     * Create a bitmap of used clusters.
>> +     * If a bit is set, there is a BAT entry pointing to this cluster.
>> +     * Loop through the BAT entrues, check bits relevant to an entry 
>> offset.
>> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
>> +     */
>> +    bitmap_size = host_cluster_index(s, high_off) + 1;
>> +    bitmap = bitmap_new(bitmap_size);
>> +
>> +    buf = g_malloc(s->cluster_size);
>> +    qemu_iovec_init(&qiov, 0);
>> +    qemu_iovec_add(&qiov, buf, s->cluster_size);
>> +
>> +    for (i = 0; i < s->bat_size; i++) {
>> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
>> +        if (off == 0) {
>> +            continue;
>> +        }
>> +
>> +        cluster_index = host_cluster_index(s, off);
>> +        if (test_bit(cluster_index, bitmap)) {
>> +            /* this cluster duplicates another one */
>> +            fprintf(stderr,
>> +                    "%s duplicate offset in BAT entry %u\n",
>> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
>> +
>> +            res->corruptions++;
>> +
>> +            if (fix & BDRV_FIX_ERRORS) {
>> +                /*
>> +                 * Reset the entry and allocate a new cluster
>> +                 * for the relevant guest offset. In this way we let
>> +                 * the lower layer to place the new cluster properly.
>> +                 * Copy the original cluster to the allocated one.
>> +                 */
>> +                parallels_set_bat_entry(s, i, 0);
>> +
>> +                ret = bdrv_pread(bs->file, off, s->cluster_size, 
>> buf, 0);
>> +                if (ret < 0) {
>> +                    res->check_errors++;
>> +                    goto out;
>> +                }
>> +
>> +                sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
>> +                off = allocate_clusters(bs, sector, s->tracks, &n);
>> +                if (off < 0) {
>> +                    res->check_errors++;
>> +                    ret = off;
>> +                    goto out;
>> +                }
>> +                off <<= BDRV_SECTOR_BITS;
>> +                if (off > high_off) {
>> +                    high_off = off;
>> +                }
>> +
>> +                ret = bdrv_co_pwritev(bs->file, off, 
>> s->cluster_size, &qiov, 0);
>> +                if (ret < 0) {
>> +                    res->check_errors++;
>> +                    goto out;
>> +                }
>> +
>> +                new_allocations = true;
>> +                res->corruptions_fixed++;
>> +            }
>> +
>> +        } else {
>> +            bitmap_set(bitmap, cluster_index, 1);
>> +        }
>> +    }
>> +
>> +    if (new_allocations) {
>> +        /*
>> +         * When new clusters are allocated, file size increases
>> +         * by 128 Mb blocks. We need to truncate the file to the
>> +         * right size.
>> +         */
>> +        ret = parallels_handle_leak(bs, res, high_off, true);
>> +        if (ret < 0) {
>> +            res->check_errors++;
>> +            goto out;
>> +        }
>> +    }
> OK. I have re-read the code with test case handy and now
> understand the situation completely.
>
> The problem is that img_check() routine calls bdrv_check()
> actually TWICE without image reopening and thus we
> comes to some trouble on the second check as we have
> had preallocated some space inside the image. This
> is root of the problem.
>
> Though this kind of the fix seems like overkill, I still do not
> like the resulted code. It at least do not scale with the checks
> which we will add further.
>
> I think that we could do that in two ways:
> * temporary set prealloc_mode to none at start of parallels_co_check
>   and return it back at the end
> * parallels_leak_check should just set data_end and do nothing
>    more + we should have truncate at the end of the
>    parallels_co_check() if we have had performed ANY fix

better way found. We should check not file length in handle_leak!
We should compare highest_off with the data_end and that is
146% correct.

File length COULD be more than highest possible data cluster
offset, but data_end should point to the correct location.
That is it!

Den
Alexander Ivanov Sept. 9, 2022, 7:37 a.m. UTC | #4
On 08.09.2022 19:45, Denis V. Lunev wrote:
> On 9/8/22 19:15, Denis V. Lunev wrote:
>> On 9/2/22 10:52, Alexander Ivanov wrote:
>>> Cluster offsets must be unique among all BAT entries.
>>> Find duplicate offsets in the BAT.
>>>
>>> If a duplicated offset is found fix it by copying the content
>>> of the relevant cluster to a new allocated cluster and
>>> set the new cluster offset to the duplicated entry.
>>>
>>> Add host_cluster_index() helper to deduplicate the code.
>>> Add highest_offset() helper. It will be used for code deduplication
>>> in the next patch.
>>>
>>> Signed-off-by: Alexander Ivanov <alexander.ivanov@virtuozzo.com>
>>> ---
>>>   block/parallels.c | 136 
>>> ++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 136 insertions(+)
>>>
>>> diff --git a/block/parallels.c b/block/parallels.c
>>> index dbcaf5d310..339ce45634 100644
>>> --- a/block/parallels.c
>>> +++ b/block/parallels.c
>>> @@ -136,6 +136,26 @@ static int cluster_remainder(BDRVParallelsState 
>>> *s, int64_t sector_num,
>>>       return MIN(nb_sectors, ret);
>>>   }
>>>   +static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t 
>>> off)
>>> +{
>>> +    off -= s->header->data_off << BDRV_SECTOR_BITS;
>>> +    return off / s->cluster_size;
>>> +}
>>> +
>>> +static int64_t highest_offset(BDRVParallelsState *s)
>>> +{
>>> +    int64_t off, high_off = 0;
>>> +    int i;
>>> +
>>> +    for (i = 0; i < s->bat_size; i++) {
>>> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
>>> +        if (off > high_off) {
>>> +            high_off = off;
>>> +        }
>>> +    }
>>> +    return high_off;
>>> +}
>>> +
>>>   static int64_t block_status(BDRVParallelsState *s, int64_t 
>>> sector_num,
>>>                               int nb_sectors, int *pnum)
>>>   {
>>> @@ -547,6 +567,114 @@ static int 
>>> parallels_check_leak(BlockDriverState *bs,
>>>       return 0;
>>>   }
>>>   +static int parallels_check_duplicate(BlockDriverState *bs,
>>> +                                     BdrvCheckResult *res,
>>> +                                     BdrvCheckMode fix)
>>> +{
>>> +    BDRVParallelsState *s = bs->opaque;
>>> +    QEMUIOVector qiov;
>>> +    int64_t off, high_off, sector;
>>> +    unsigned long *bitmap;
>>> +    uint32_t i, bitmap_size, cluster_index;
>>> +    int n, ret = 0;
>>> +    uint64_t *buf = NULL;
>>> +    bool new_allocations = false;
>>> +
>>> +    high_off = highest_offset(s);
>>> +    if (high_off == 0) {
>>> +        return 0;
>>> +    }
>>> +
>>> +    /*
>>> +     * Create a bitmap of used clusters.
>>> +     * If a bit is set, there is a BAT entry pointing to this cluster.
>>> +     * Loop through the BAT entrues, check bits relevant to an 
>>> entry offset.
>>> +     * If bit is set, this entry is duplicated. Otherwise set the bit.
>>> +     */
>>> +    bitmap_size = host_cluster_index(s, high_off) + 1;
>>> +    bitmap = bitmap_new(bitmap_size);
>>> +
>>> +    buf = g_malloc(s->cluster_size);
>>> +    qemu_iovec_init(&qiov, 0);
>>> +    qemu_iovec_add(&qiov, buf, s->cluster_size);
>>> +
>>> +    for (i = 0; i < s->bat_size; i++) {
>>> +        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
>>> +        if (off == 0) {
>>> +            continue;
>>> +        }
>>> +
>>> +        cluster_index = host_cluster_index(s, off);
>>> +        if (test_bit(cluster_index, bitmap)) {
>>> +            /* this cluster duplicates another one */
>>> +            fprintf(stderr,
>>> +                    "%s duplicate offset in BAT entry %u\n",
>>> +                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
>>> +
>>> +            res->corruptions++;
>>> +
>>> +            if (fix & BDRV_FIX_ERRORS) {
>>> +                /*
>>> +                 * Reset the entry and allocate a new cluster
>>> +                 * for the relevant guest offset. In this way we let
>>> +                 * the lower layer to place the new cluster properly.
>>> +                 * Copy the original cluster to the allocated one.
>>> +                 */
>>> +                parallels_set_bat_entry(s, i, 0);
>>> +
>>> +                ret = bdrv_pread(bs->file, off, s->cluster_size, 
>>> buf, 0);
>>> +                if (ret < 0) {
>>> +                    res->check_errors++;
>>> +                    goto out;
>>> +                }
>>> +
>>> +                sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
>>> +                off = allocate_clusters(bs, sector, s->tracks, &n);
>>> +                if (off < 0) {
>>> +                    res->check_errors++;
>>> +                    ret = off;
>>> +                    goto out;
>>> +                }
>>> +                off <<= BDRV_SECTOR_BITS;
>>> +                if (off > high_off) {
>>> +                    high_off = off;
>>> +                }
>>> +
>>> +                ret = bdrv_co_pwritev(bs->file, off, 
>>> s->cluster_size, &qiov, 0);
>>> +                if (ret < 0) {
>>> +                    res->check_errors++;
>>> +                    goto out;
>>> +                }
>>> +
>>> +                new_allocations = true;
>>> +                res->corruptions_fixed++;
>>> +            }
>>> +
>>> +        } else {
>>> +            bitmap_set(bitmap, cluster_index, 1);
>>> +        }
>>> +    }
>>> +
>>> +    if (new_allocations) {
>>> +        /*
>>> +         * When new clusters are allocated, file size increases
>>> +         * by 128 Mb blocks. We need to truncate the file to the
>>> +         * right size.
>>> +         */
>>> +        ret = parallels_handle_leak(bs, res, high_off, true);
>>> +        if (ret < 0) {
>>> +            res->check_errors++;
>>> +            goto out;
>>> +        }
>>> +    }
>> OK. I have re-read the code with test case handy and now
>> understand the situation completely.
>>
>> The problem is that img_check() routine calls bdrv_check()
>> actually TWICE without image reopening and thus we
>> comes to some trouble on the second check as we have
>> had preallocated some space inside the image. This
>> is root of the problem.
>>
>> Though this kind of the fix seems like overkill, I still do not
>> like the resulted code. It at least do not scale with the checks
>> which we will add further.
>>
>> I think that we could do that in two ways:
>> * temporary set prealloc_mode to none at start of parallels_co_check
>>   and return it back at the end
>> * parallels_leak_check should just set data_end and do nothing
>>    more + we should have truncate at the end of the
>>    parallels_co_check() if we have had performed ANY fix
>
> better way found. We should check not file length in handle_leak!
> We should compare highest_off with the data_end and that is
> 146% correct.
>
> File length COULD be more than highest possible data cluster
> offset, but data_end should point to the correct location.
> That is it!
>
> Den

Initially data_end points to the end of the cluster with the highest

offset in the BAT, not to the file end. So we can't detect leaks in

such a way.
diff mbox series

Patch

diff --git a/block/parallels.c b/block/parallels.c
index dbcaf5d310..339ce45634 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -136,6 +136,26 @@  static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
     return MIN(nb_sectors, ret);
 }
 
+static uint32_t host_cluster_index(BDRVParallelsState *s, int64_t off)
+{
+    off -= s->header->data_off << BDRV_SECTOR_BITS;
+    return off / s->cluster_size;
+}
+
+static int64_t highest_offset(BDRVParallelsState *s)
+{
+    int64_t off, high_off = 0;
+    int i;
+
+    for (i = 0; i < s->bat_size; i++) {
+        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (off > high_off) {
+            high_off = off;
+        }
+    }
+    return high_off;
+}
+
 static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
                             int nb_sectors, int *pnum)
 {
@@ -547,6 +567,114 @@  static int parallels_check_leak(BlockDriverState *bs,
     return 0;
 }
 
+static int parallels_check_duplicate(BlockDriverState *bs,
+                                     BdrvCheckResult *res,
+                                     BdrvCheckMode fix)
+{
+    BDRVParallelsState *s = bs->opaque;
+    QEMUIOVector qiov;
+    int64_t off, high_off, sector;
+    unsigned long *bitmap;
+    uint32_t i, bitmap_size, cluster_index;
+    int n, ret = 0;
+    uint64_t *buf = NULL;
+    bool new_allocations = false;
+
+    high_off = highest_offset(s);
+    if (high_off == 0) {
+        return 0;
+    }
+
+    /*
+     * Create a bitmap of used clusters.
+     * If a bit is set, there is a BAT entry pointing to this cluster.
+     * Loop through the BAT entrues, check bits relevant to an entry offset.
+     * If bit is set, this entry is duplicated. Otherwise set the bit.
+     */
+    bitmap_size = host_cluster_index(s, high_off) + 1;
+    bitmap = bitmap_new(bitmap_size);
+
+    buf = g_malloc(s->cluster_size);
+    qemu_iovec_init(&qiov, 0);
+    qemu_iovec_add(&qiov, buf, s->cluster_size);
+
+    for (i = 0; i < s->bat_size; i++) {
+        off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+        if (off == 0) {
+            continue;
+        }
+
+        cluster_index = host_cluster_index(s, off);
+        if (test_bit(cluster_index, bitmap)) {
+            /* this cluster duplicates another one */
+            fprintf(stderr,
+                    "%s duplicate offset in BAT entry %u\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+
+            res->corruptions++;
+
+            if (fix & BDRV_FIX_ERRORS) {
+                /*
+                 * Reset the entry and allocate a new cluster
+                 * for the relevant guest offset. In this way we let
+                 * the lower layer to place the new cluster properly.
+                 * Copy the original cluster to the allocated one.
+                 */
+                parallels_set_bat_entry(s, i, 0);
+
+                ret = bdrv_pread(bs->file, off, s->cluster_size, buf, 0);
+                if (ret < 0) {
+                    res->check_errors++;
+                    goto out;
+                }
+
+                sector = (i * s->cluster_size) >> BDRV_SECTOR_BITS;
+                off = allocate_clusters(bs, sector, s->tracks, &n);
+                if (off < 0) {
+                    res->check_errors++;
+                    ret = off;
+                    goto out;
+                }
+                off <<= BDRV_SECTOR_BITS;
+                if (off > high_off) {
+                    high_off = off;
+                }
+
+                ret = bdrv_co_pwritev(bs->file, off, s->cluster_size, &qiov, 0);
+                if (ret < 0) {
+                    res->check_errors++;
+                    goto out;
+                }
+
+                new_allocations = true;
+                res->corruptions_fixed++;
+            }
+
+        } else {
+            bitmap_set(bitmap, cluster_index, 1);
+        }
+    }
+
+    if (new_allocations) {
+        /*
+         * When new clusters are allocated, file size increases
+         * by 128 Mb blocks. We need to truncate the file to the
+         * right size.
+         */
+        ret = parallels_handle_leak(bs, res, high_off, true);
+        if (ret < 0) {
+            res->check_errors++;
+            goto out;
+        }
+    }
+
+out:
+    qemu_iovec_destroy(&qiov);
+    g_free(buf);
+    g_free(bitmap);
+    return ret;
+}
+
 static void parallels_collect_statistics(BlockDriverState *bs,
                                          BdrvCheckResult *res,
                                          BdrvCheckMode fix)
@@ -595,6 +723,14 @@  static int coroutine_fn parallels_co_check(BlockDriverState *bs,
             return ret;
         }
 
+        /* This checks only for "WithouFreSpacExt" format */
+        if (!memcmp(s->header->magic, HEADER_MAGIC2, 16)) {
+            ret = parallels_check_duplicate(bs, res, fix);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
         parallels_collect_statistics(bs, res, fix);
     }