diff mbox series

[1/6] block: add bitmap-populate job

Message ID 20200225005641.5478-2-jsnow@redhat.com (mailing list archive)
State New, archived
Headers show
Series block: add block-dirty-bitmap-populate job | expand

Commit Message

John Snow Feb. 25, 2020, 12:56 a.m. UTC
This job copies the allocation map into a bitmap. It's a job because
there's no guarantee that allocation interrogation will be quick (or
won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.

It was designed with different possible population patterns in mind,
but only top layer allocation was implemented for now.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 qapi/block-core.json      |  48 +++++++++
 qapi/job.json             |   2 +-
 include/block/block_int.h |  21 ++++
 block/bitmap-alloc.c      | 207 ++++++++++++++++++++++++++++++++++++++
 blockjob.c                |   3 +-
 block/Makefile.objs       |   1 +
 6 files changed, 280 insertions(+), 2 deletions(-)
 create mode 100644 block/bitmap-alloc.c

Comments

Vladimir Sementsov-Ogievskiy Feb. 25, 2020, 4:04 p.m. UTC | #1
25.02.2020 3:56, John Snow wrote:
> This job copies the allocation map into a bitmap. It's a job because
> there's no guarantee that allocation interrogation will be quick (or
> won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.
> 
> It was designed with different possible population patterns in mind,
> but only top layer allocation was implemented for now.
> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>   qapi/block-core.json      |  48 +++++++++
>   qapi/job.json             |   2 +-
>   include/block/block_int.h |  21 ++++
>   block/bitmap-alloc.c      | 207 ++++++++++++++++++++++++++++++++++++++
>   blockjob.c                |   3 +-
>   block/Makefile.objs       |   1 +
>   6 files changed, 280 insertions(+), 2 deletions(-)
>   create mode 100644 block/bitmap-alloc.c
> 
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 85e27bb61f..df1797681a 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -2245,6 +2245,54 @@
>         { 'command': 'block-dirty-bitmap-merge',
>           'data': 'BlockDirtyBitmapMerge' }
>   
> +##
> +# @BitmapPattern:
> +#
> +# An enumeration of possible patterns that can be written into a bitmap.
> +#
> +# @allocation-top: The allocation status of the top layer
> +#                  of the attached storage node.
> +#
> +# Since: 5.0
> +##
> +{ 'enum': 'BitmapPattern',
> +  'data': ['allocation-top'] }
> +
> +##
> +# @BlockDirtyBitmapPopulate:
> +#
> +# @job-id: identifier for the newly-created block job.
> +#
> +# @pattern: What pattern should be written into the bitmap?
> +#
> +# @on-error: the action to take if an error is encountered on a bitmap's
> +#            attached node, default 'report'.
> +#            'stop' and 'enospc' can only be used if the block device supports
> +#            io-status (see BlockInfo).
> +#
> +# @auto-finalize: When false, this job will wait in a PENDING state after it has
> +#                 finished its work, waiting for @block-job-finalize before
> +#                 making any block graph changes.

sounds a bit strange in context of bitmap-population job

> +#                 When true, this job will automatically
> +#                 perform its abort or commit actions.
> +#                 Defaults to true.
> +#
> +# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
> +#                has completely ceased all work, and awaits @block-job-dismiss.
> +#                When true, this job will automatically disappear from the query
> +#                list without user intervention.
> +#                Defaults to true.
> +#
> +# Since: 5.0
> +##
> +{ 'struct': 'BlockDirtyBitmapPopulate',
> +  'base': 'BlockDirtyBitmap',
> +  'data': { 'job-id': 'str',
> +            'pattern': 'BitmapPattern',
> +            '*on-error': 'BlockdevOnError',
> +            '*auto-finalize': 'bool',
> +            '*auto-dismiss': 'bool' } }
> +
>   ##
>   # @BlockDirtyBitmapSha256:
>   #
> diff --git a/qapi/job.json b/qapi/job.json
> index 5e658281f5..5f496d4630 100644
> --- a/qapi/job.json
> +++ b/qapi/job.json
> @@ -22,7 +22,7 @@
>   # Since: 1.7
>   ##
>   { 'enum': 'JobType',
> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'bitmap-populate'] }
>   
>   ##
>   # @JobStatus:
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 6f9fd5e20e..a5884b597e 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -1215,6 +1215,27 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
>                               BlockCompletionFunc *cb, void *opaque,
>                               JobTxn *txn, Error **errp);
>   
> +/*
> + * bitpop_job_create: Create a new bitmap population job.
> + *
> + * @job_id: The id of the newly-created job.
> + * @bs: Block device associated with the @target_bitmap.
> + * @target_bitmap: The bitmap to populate.
> + * @on_error: What to do if an error on @bs is encountered.
> + * @creation_flags: Flags that control the behavior of the Job lifetime.
> + *                  See @BlockJobCreateFlags
> + * @cb: Completion function for the job.
> + * @opaque: Opaque pointer value passed to @cb.
> + * @txn: Transaction that this job is part of (may be NULL).
> + */
> +BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
> +                            BdrvDirtyBitmap *target_bitmap,
> +                            BitmapPattern pattern,
> +                            BlockdevOnError on_error,
> +                            int creation_flags,
> +                            BlockCompletionFunc *cb, void *opaque,
> +                            JobTxn *txn, Error **errp);
> +
>   void hmp_drive_add_node(Monitor *mon, const char *optstr);
>   
>   BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
> diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
> new file mode 100644
> index 0000000000..47d542dc12
> --- /dev/null
> +++ b/block/bitmap-alloc.c
> @@ -0,0 +1,207 @@
> +/*
> + * Async Dirty Bitmap Populator
> + *
> + * Copyright (C) 2020 Red Hat, Inc.
> + *
> + * Authors:
> + *  John Snow <jsnow@redhat.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +
> +#include "trace.h"
> +#include "block/block.h"
> +#include "block/block_int.h"
> +#include "block/blockjob_int.h"
> +#include "block/block_backup.h"
> +#include "block/block-copy.h"

I hope, not all includes are needed :)

> +#include "qapi/error.h"
> +#include "qapi/qmp/qerror.h"
> +#include "qemu/ratelimit.h"
> +#include "qemu/cutils.h"
> +#include "sysemu/block-backend.h"
> +#include "qemu/bitmap.h"
> +#include "qemu/error-report.h"
> +
> +typedef struct BitpopBlockJob {
> +    BlockJob common;
> +    BlockDriverState *bs;
> +    BdrvDirtyBitmap *target_bitmap;
> +    BdrvDirtyBitmap *new_bitmap;
> +    BlockdevOnError on_error;
> +    uint64_t len;
> +} BitpopBlockJob;
> +
> +static const BlockJobDriver bitpop_job_driver;
> +
> +static void bitpop_commit(Job *job)
> +{
> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
> +
> +    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
> +                                     NULL, true);

Hmm, so you populate new_bitmap, and then merge to target. Why can't we work
directly with target bitmap? The most probable case is that libvirt will
create bitmap specifically to use as target in this operation, or not?

Hmm, just to make it possible to cancel the job and keep the target bitmap in
original state? Is it really needed? I think on failure target bitmap will be
removed anyway..

> +}
> +
> +/* no abort needed; just clean without committing. */
> +
> +static void bitpop_clean(Job *job)
> +{
> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
> +
> +    bdrv_release_dirty_bitmap(s->new_bitmap);
> +    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
> +}
> +
> +static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int error)
> +{
> +    return block_job_error_action(&job->common, job->on_error, true, error);
> +}
> +
> +static bool coroutine_fn yield_and_check(Job *job)
> +{
> +    if (job_is_cancelled(job)) {
> +        return true;
> +    }
> +
> +    job_sleep_ns(job, 0);
> +
> +    if (job_is_cancelled(job)) {
> +        return true;
> +    }
> +
> +    return false;
> +}
> +
> +static int coroutine_fn bitpop_run(Job *job, Error **errp)
> +{
> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
> +    int ret = 0;
> +    int64_t offset;
> +    int64_t count;
> +    int64_t bytes;
> +
> +    for (offset = 0; offset < s->len; ) {
> +        if (yield_and_check(job)) {
> +            ret = -ECANCELED;
> +            break;
> +        }
> +
> +        bytes = s->len - offset;
> +        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
> +        if (ret < 0) {
> +            if (bitpop_error_action(s, -ret) == BLOCK_ERROR_ACTION_REPORT) {
> +                break;
> +            }
> +            continue;
> +        }
> +
> +        if (!count) {
> +            ret = 0;

Hmm, I think it's impossible case.. If so, better to make an assertion or ignore..

> +            break;
> +        }
> +
> +        if (ret) {
> +            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
> +            ret = 0;
> +        }
> +
> +        job_progress_update(job, count);
> +        offset += count;
> +    }
> +
> +    return ret;
> +}
> +
> +static const BlockJobDriver bitpop_job_driver = {
> +    .job_driver = {
> +        .instance_size          = sizeof(BitpopBlockJob),
> +        .job_type               = JOB_TYPE_BITMAP_POPULATE,
> +        .free                   = block_job_free,
> +        .user_resume            = block_job_user_resume,
> +        .run                    = bitpop_run,
> +        .commit                 = bitpop_commit,
> +        .clean                  = bitpop_clean,
> +    }
> +};
> +
> +
> +BlockJob *bitpop_job_create(
> +    const char *job_id,
> +    BlockDriverState *bs,
> +    BdrvDirtyBitmap *target_bitmap,
> +    BitmapPattern pattern,
> +    BlockdevOnError on_error,
> +    int creation_flags,
> +    BlockCompletionFunc *cb,
> +    void *opaque,
> +    JobTxn *txn,
> +    Error **errp)
> +{
> +    int64_t len;
> +    BitpopBlockJob *job = NULL;
> +    int64_t cluster_size;
> +    BdrvDirtyBitmap *new_bitmap = NULL;
> +
> +    assert(bs);
> +    assert(target_bitmap);
> +
> +    if (!bdrv_is_inserted(bs)) {
> +        error_setg(errp, "Device is not inserted: %s",
> +                   bdrv_get_device_name(bs));
> +        return NULL;
> +    }

Why this?

> +
> +    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
> +        return NULL;
> +    }

and this?

> +
> +    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
> +        return NULL;
> +    }
> +
> +    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
> +        error_setg(errp, "Unrecognized bitmap pattern");
> +        return NULL;
> +    }
> +
> +    len = bdrv_getlength(bs);
> +    if (len < 0) {
> +        error_setg_errno(errp, -len, "unable to get length for '%s'",
> +                         bdrv_get_device_name(bs));
> +        return NULL;
> +    }
> +
> +    /* NB: new bitmap is anonymous and enabled */
> +    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
> +    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
> +    if (!new_bitmap) {
> +        return NULL;
> +    }
> +
> +    /* Take ownership; we reserve the right to write into this on-commit. */
> +    bdrv_dirty_bitmap_set_busy(target_bitmap, true);

Honestly, I still have bad understanding about how should we use dirty bitmap mutex,
but note that bdrv_dirty_bitmap_set_busy locks the mutex. And it is (may be) possible,
that busy status of the bitmap is changed after bdrv_dirty_bitmap_check but before
bdrv_dirty_bitmap_set_busy.  So, more correct would be do both operation under one
critical section. Still, I don't know is the situation possible.

> +
> +    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
> +                           BLK_PERM_CONSISTENT_READ,

Do we need it? We are not going to read..

> +                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
> +                           0, creation_flags,
> +                           cb, opaque, errp);
> +    if (!job) {
> +        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
> +        bdrv_release_dirty_bitmap(new_bitmap);
> +        return NULL;
> +    }
> +
> +    job->bs = bs;
> +    job->on_error = on_error;
> +    job->target_bitmap = target_bitmap;
> +    job->new_bitmap = new_bitmap;
> +    job->len = len;
> +    job_progress_set_remaining(&job->common.job, job->len);
> +
> +    return &job->common;
> +}
> diff --git a/blockjob.c b/blockjob.c
> index 5d63b1e89d..7e450372bd 100644
> --- a/blockjob.c
> +++ b/blockjob.c
> @@ -56,7 +56,8 @@ static bool is_block_job(Job *job)
>       return job_type(job) == JOB_TYPE_BACKUP ||
>              job_type(job) == JOB_TYPE_COMMIT ||
>              job_type(job) == JOB_TYPE_MIRROR ||
> -           job_type(job) == JOB_TYPE_STREAM;
> +           job_type(job) == JOB_TYPE_STREAM ||
> +           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
>   }
>   
>   BlockJob *block_job_next(BlockJob *bjob)
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 3bcb35c81d..f3cfc89d90 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -36,6 +36,7 @@ block-obj-$(CONFIG_LIBSSH) += ssh.o
>   block-obj-y += accounting.o dirty-bitmap.o
>   block-obj-y += write-threshold.o
>   block-obj-y += backup.o
> +block-obj-y += bitmap-alloc.o
>   block-obj-$(CONFIG_REPLICATION) += replication.o
>   block-obj-y += throttle.o copy-on-read.o
>   block-obj-y += block-copy.o
>
John Snow Feb. 25, 2020, 8:41 p.m. UTC | #2
On 2/25/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:
> 25.02.2020 3:56, John Snow wrote:
>> This job copies the allocation map into a bitmap. It's a job because
>> there's no guarantee that allocation interrogation will be quick (or
>> won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.
>>
>> It was designed with different possible population patterns in mind,
>> but only top layer allocation was implemented for now.
>>
>> Signed-off-by: John Snow <jsnow@redhat.com>
>> ---
>>   qapi/block-core.json      |  48 +++++++++
>>   qapi/job.json             |   2 +-
>>   include/block/block_int.h |  21 ++++
>>   block/bitmap-alloc.c      | 207 ++++++++++++++++++++++++++++++++++++++
>>   blockjob.c                |   3 +-
>>   block/Makefile.objs       |   1 +
>>   6 files changed, 280 insertions(+), 2 deletions(-)
>>   create mode 100644 block/bitmap-alloc.c
>>
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 85e27bb61f..df1797681a 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -2245,6 +2245,54 @@
>>         { 'command': 'block-dirty-bitmap-merge',
>>           'data': 'BlockDirtyBitmapMerge' }
>>   +##
>> +# @BitmapPattern:
>> +#
>> +# An enumeration of possible patterns that can be written into a bitmap.
>> +#
>> +# @allocation-top: The allocation status of the top layer
>> +#                  of the attached storage node.
>> +#
>> +# Since: 5.0
>> +##
>> +{ 'enum': 'BitmapPattern',
>> +  'data': ['allocation-top'] }
>> +
>> +##
>> +# @BlockDirtyBitmapPopulate:
>> +#
>> +# @job-id: identifier for the newly-created block job.
>> +#
>> +# @pattern: What pattern should be written into the bitmap?
>> +#
>> +# @on-error: the action to take if an error is encountered on a bitmap's
>> +#            attached node, default 'report'.
>> +#            'stop' and 'enospc' can only be used if the block device
>> supports
>> +#            io-status (see BlockInfo).
>> +#
>> +# @auto-finalize: When false, this job will wait in a PENDING state
>> after it has
>> +#                 finished its work, waiting for @block-job-finalize
>> before
>> +#                 making any block graph changes.
> 
> sounds a bit strange in context of bitmap-population job
> 

Yeah, you're right. Copy-pasted for "consistency".

>> +#                 When true, this job will automatically
>> +#                 perform its abort or commit actions.
>> +#                 Defaults to true.
>> +#
>> +# @auto-dismiss: When false, this job will wait in a CONCLUDED state
>> after it
>> +#                has completely ceased all work, and awaits
>> @block-job-dismiss.
>> +#                When true, this job will automatically disappear
>> from the query
>> +#                list without user intervention.
>> +#                Defaults to true.
>> +#
>> +# Since: 5.0
>> +##
>> +{ 'struct': 'BlockDirtyBitmapPopulate',
>> +  'base': 'BlockDirtyBitmap',
>> +  'data': { 'job-id': 'str',
>> +            'pattern': 'BitmapPattern',
>> +            '*on-error': 'BlockdevOnError',
>> +            '*auto-finalize': 'bool',
>> +            '*auto-dismiss': 'bool' } }
>> +
>>   ##
>>   # @BlockDirtyBitmapSha256:
>>   #
>> diff --git a/qapi/job.json b/qapi/job.json
>> index 5e658281f5..5f496d4630 100644
>> --- a/qapi/job.json
>> +++ b/qapi/job.json
>> @@ -22,7 +22,7 @@
>>   # Since: 1.7
>>   ##
>>   { 'enum': 'JobType',
>> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
>> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create',
>> 'bitmap-populate'] }
>>     ##
>>   # @JobStatus:
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index 6f9fd5e20e..a5884b597e 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -1215,6 +1215,27 @@ BlockJob *backup_job_create(const char *job_id,
>> BlockDriverState *bs,
>>                               BlockCompletionFunc *cb, void *opaque,
>>                               JobTxn *txn, Error **errp);
>>   +/*
>> + * bitpop_job_create: Create a new bitmap population job.
>> + *
>> + * @job_id: The id of the newly-created job.
>> + * @bs: Block device associated with the @target_bitmap.
>> + * @target_bitmap: The bitmap to populate.
>> + * @on_error: What to do if an error on @bs is encountered.
>> + * @creation_flags: Flags that control the behavior of the Job lifetime.
>> + *                  See @BlockJobCreateFlags
>> + * @cb: Completion function for the job.
>> + * @opaque: Opaque pointer value passed to @cb.
>> + * @txn: Transaction that this job is part of (may be NULL).
>> + */
>> +BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
>> +                            BdrvDirtyBitmap *target_bitmap,
>> +                            BitmapPattern pattern,
>> +                            BlockdevOnError on_error,
>> +                            int creation_flags,
>> +                            BlockCompletionFunc *cb, void *opaque,
>> +                            JobTxn *txn, Error **errp);
>> +
>>   void hmp_drive_add_node(Monitor *mon, const char *optstr);
>>     BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>> diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
>> new file mode 100644
>> index 0000000000..47d542dc12
>> --- /dev/null
>> +++ b/block/bitmap-alloc.c
>> @@ -0,0 +1,207 @@
>> +/*
>> + * Async Dirty Bitmap Populator
>> + *
>> + * Copyright (C) 2020 Red Hat, Inc.
>> + *
>> + * Authors:
>> + *  John Snow <jsnow@redhat.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +
>> +#include "trace.h"
>> +#include "block/block.h"
>> +#include "block/block_int.h"
>> +#include "block/blockjob_int.h"
>> +#include "block/block_backup.h"
>> +#include "block/block-copy.h"
> 
> I hope, not all includes are needed :)

Whoops, no, of course not. I copied the skeleton from backup, as you can
tell ;)

> 
>> +#include "qapi/error.h"
>> +#include "qapi/qmp/qerror.h"
>> +#include "qemu/ratelimit.h"
>> +#include "qemu/cutils.h"
>> +#include "sysemu/block-backend.h"
>> +#include "qemu/bitmap.h"
>> +#include "qemu/error-report.h"
>> +
>> +typedef struct BitpopBlockJob {
>> +    BlockJob common;
>> +    BlockDriverState *bs;
>> +    BdrvDirtyBitmap *target_bitmap;
>> +    BdrvDirtyBitmap *new_bitmap;
>> +    BlockdevOnError on_error;
>> +    uint64_t len;
>> +} BitpopBlockJob;
>> +
>> +static const BlockJobDriver bitpop_job_driver;
>> +
>> +static void bitpop_commit(Job *job)
>> +{
>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>> +
>> +    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
>> +                                     NULL, true);
> 
> Hmm, so you populate new_bitmap, and then merge to target. Why can't we
> work
> directly with target bitmap? The most probable case is that libvirt will
> create bitmap specifically to use as target in this operation, or not?
> 

Most likely case, yes. Odds are very good it will be a brand new bitmap.

However, we already have a creation command -- I didn't want to make a
second job-version of the command and then maintain two interfaces, so I
made it a "merge into existing" style command instead.

> Hmm, just to make it possible to cancel the job and keep the target
> bitmap in
> original state? Is it really needed? I think on failure target bitmap
> will be
> removed anyway..
> 

You caught me being *lazy*. I copy the bitmap so I can unconditionally
enable it to catch in-flight writes without having to create block graph
modifications.

But, yes, to undo changes if we cancel.

I didn't want to make a job that was not able to be canceled. The
alternative is pursuing the design where we allow new bitmaps only --
because then on cancel we can just delete them.

>> +}
>> +
>> +/* no abort needed; just clean without committing. */
>> +
>> +static void bitpop_clean(Job *job)
>> +{
>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>> +
>> +    bdrv_release_dirty_bitmap(s->new_bitmap);
>> +    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
>> +}
>> +
>> +static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int
>> error)
>> +{
>> +    return block_job_error_action(&job->common, job->on_error, true,
>> error);
>> +}
>> +
>> +static bool coroutine_fn yield_and_check(Job *job)
>> +{
>> +    if (job_is_cancelled(job)) {
>> +        return true;
>> +    }
>> +
>> +    job_sleep_ns(job, 0);
>> +
>> +    if (job_is_cancelled(job)) {
>> +        return true;
>> +    }
>> +
>> +    return false;
>> +}
>> +
>> +static int coroutine_fn bitpop_run(Job *job, Error **errp)
>> +{
>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>> +    int ret = 0;
>> +    int64_t offset;
>> +    int64_t count;
>> +    int64_t bytes;
>> +
>> +    for (offset = 0; offset < s->len; ) {
>> +        if (yield_and_check(job)) {
>> +            ret = -ECANCELED;
>> +            break;
>> +        }
>> +
>> +        bytes = s->len - offset;
>> +        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
>> +        if (ret < 0) {
>> +            if (bitpop_error_action(s, -ret) ==
>> BLOCK_ERROR_ACTION_REPORT) {
>> +                break;
>> +            }
>> +            continue;
>> +        }
>> +
>> +        if (!count) {
>> +            ret = 0;
> 
> Hmm, I think it's impossible case.. If so, better to make an assertion
> or ignore..
> 

OK, agreed.

>> +            break;
>> +        }
>> +
>> +        if (ret) {
>> +            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
>> +            ret = 0;
>> +        }
>> +
>> +        job_progress_update(job, count);
>> +        offset += count;
>> +    }
>> +
>> +    return ret;
>> +}
>> +
>> +static const BlockJobDriver bitpop_job_driver = {
>> +    .job_driver = {
>> +        .instance_size          = sizeof(BitpopBlockJob),
>> +        .job_type               = JOB_TYPE_BITMAP_POPULATE,
>> +        .free                   = block_job_free,
>> +        .user_resume            = block_job_user_resume,
>> +        .run                    = bitpop_run,
>> +        .commit                 = bitpop_commit,
>> +        .clean                  = bitpop_clean,
>> +    }
>> +};
>> +
>> +
>> +BlockJob *bitpop_job_create(
>> +    const char *job_id,
>> +    BlockDriverState *bs,
>> +    BdrvDirtyBitmap *target_bitmap,
>> +    BitmapPattern pattern,
>> +    BlockdevOnError on_error,
>> +    int creation_flags,
>> +    BlockCompletionFunc *cb,
>> +    void *opaque,
>> +    JobTxn *txn,
>> +    Error **errp)
>> +{
>> +    int64_t len;
>> +    BitpopBlockJob *job = NULL;
>> +    int64_t cluster_size;
>> +    BdrvDirtyBitmap *new_bitmap = NULL;
>> +
>> +    assert(bs);
>> +    assert(target_bitmap);
>> +
>> +    if (!bdrv_is_inserted(bs)) {
>> +        error_setg(errp, "Device is not inserted: %s",
>> +                   bdrv_get_device_name(bs));
>> +        return NULL;
>> +    }
> 
> Why this?
> 

I assumed there was nothing to read the allocation map *of* if there
wasn't a media present.

Am I mistaken?

>> +
>> +    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
>> +        return NULL;
>> +    }
> 
> and this?
> 

Copy-paste: I don't understand if I want a new op blocker, to re-use an
op-blocker, or to have no op blocker.

Genuinely I have no idea. I should left a review comment here, I forgot
about this part, sorry.

>> +
>> +    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT,
>> errp)) {
>> +        return NULL;
>> +    }
>> +
>> +    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
>> +        error_setg(errp, "Unrecognized bitmap pattern");
>> +        return NULL;
>> +    }
>> +
>> +    len = bdrv_getlength(bs);
>> +    if (len < 0) {
>> +        error_setg_errno(errp, -len, "unable to get length for '%s'",
>> +                         bdrv_get_device_name(bs));
>> +        return NULL;
>> +    }
>> +
>> +    /* NB: new bitmap is anonymous and enabled */
>> +    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
>> +    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
>> +    if (!new_bitmap) {
>> +        return NULL;
>> +    }
>> +
>> +    /* Take ownership; we reserve the right to write into this
>> on-commit. */
>> +    bdrv_dirty_bitmap_set_busy(target_bitmap, true);
> 
> Honestly, I still have bad understanding about how should we use dirty
> bitmap mutex,
> but note that bdrv_dirty_bitmap_set_busy locks the mutex. And it is (may
> be) possible,
> that busy status of the bitmap is changed after bdrv_dirty_bitmap_check
> but before
> bdrv_dirty_bitmap_set_busy.  So, more correct would be do both operation
> under one
> critical section. Still, I don't know is the situation possible.
> 

Aren't we under the BQL here? Can we be pre-empted? :(

>> +
>> +    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
>> +                           BLK_PERM_CONSISTENT_READ,
> 
> Do we need it? We are not going to read..
> 

Copy-paste / leftover from an earlier draft where I was trying to
achieve atomicity. It can be removed if we don't want the stricter
atomicity.

>> +                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
>> +                           0, creation_flags,
>> +                           cb, opaque, errp);
>> +    if (!job) {
>> +        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
>> +        bdrv_release_dirty_bitmap(new_bitmap);
>> +        return NULL;
>> +    }
>> +
>> +    job->bs = bs;
>> +    job->on_error = on_error;
>> +    job->target_bitmap = target_bitmap;
>> +    job->new_bitmap = new_bitmap;
>> +    job->len = len;
>> +    job_progress_set_remaining(&job->common.job, job->len);
>> +
>> +    return &job->common;
>> +}
>> diff --git a/blockjob.c b/blockjob.c
>> index 5d63b1e89d..7e450372bd 100644
>> --- a/blockjob.c
>> +++ b/blockjob.c
>> @@ -56,7 +56,8 @@ static bool is_block_job(Job *job)
>>       return job_type(job) == JOB_TYPE_BACKUP ||
>>              job_type(job) == JOB_TYPE_COMMIT ||
>>              job_type(job) == JOB_TYPE_MIRROR ||
>> -           job_type(job) == JOB_TYPE_STREAM;
>> +           job_type(job) == JOB_TYPE_STREAM ||
>> +           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
>>   }
>>     BlockJob *block_job_next(BlockJob *bjob)
>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>> index 3bcb35c81d..f3cfc89d90 100644
>> --- a/block/Makefile.objs
>> +++ b/block/Makefile.objs
>> @@ -36,6 +36,7 @@ block-obj-$(CONFIG_LIBSSH) += ssh.o
>>   block-obj-y += accounting.o dirty-bitmap.o
>>   block-obj-y += write-threshold.o
>>   block-obj-y += backup.o
>> +block-obj-y += bitmap-alloc.o
>>   block-obj-$(CONFIG_REPLICATION) += replication.o
>>   block-obj-y += throttle.o copy-on-read.o
>>   block-obj-y += block-copy.o
>>
> 
> 

Thanks for the review. I'll start making changes, but won't send V2 just
yet.

--js
Vladimir Sementsov-Ogievskiy Feb. 26, 2020, 5:07 a.m. UTC | #3
25.02.2020 23:41, John Snow wrote:
> 
> 
> On 2/25/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 25.02.2020 3:56, John Snow wrote:
>>> This job copies the allocation map into a bitmap. It's a job because
>>> there's no guarantee that allocation interrogation will be quick (or
>>> won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.
>>>
>>> It was designed with different possible population patterns in mind,
>>> but only top layer allocation was implemented for now.
>>>
>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>> ---
>>>    qapi/block-core.json      |  48 +++++++++
>>>    qapi/job.json             |   2 +-
>>>    include/block/block_int.h |  21 ++++
>>>    block/bitmap-alloc.c      | 207 ++++++++++++++++++++++++++++++++++++++
>>>    blockjob.c                |   3 +-
>>>    block/Makefile.objs       |   1 +
>>>    6 files changed, 280 insertions(+), 2 deletions(-)
>>>    create mode 100644 block/bitmap-alloc.c
>>>
>>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>>> index 85e27bb61f..df1797681a 100644
>>> --- a/qapi/block-core.json
>>> +++ b/qapi/block-core.json
>>> @@ -2245,6 +2245,54 @@
>>>          { 'command': 'block-dirty-bitmap-merge',
>>>            'data': 'BlockDirtyBitmapMerge' }
>>>    +##
>>> +# @BitmapPattern:
>>> +#
>>> +# An enumeration of possible patterns that can be written into a bitmap.
>>> +#
>>> +# @allocation-top: The allocation status of the top layer
>>> +#                  of the attached storage node.
>>> +#
>>> +# Since: 5.0
>>> +##
>>> +{ 'enum': 'BitmapPattern',
>>> +  'data': ['allocation-top'] }
>>> +
>>> +##
>>> +# @BlockDirtyBitmapPopulate:
>>> +#
>>> +# @job-id: identifier for the newly-created block job.
>>> +#
>>> +# @pattern: What pattern should be written into the bitmap?
>>> +#
>>> +# @on-error: the action to take if an error is encountered on a bitmap's
>>> +#            attached node, default 'report'.
>>> +#            'stop' and 'enospc' can only be used if the block device
>>> supports
>>> +#            io-status (see BlockInfo).
>>> +#
>>> +# @auto-finalize: When false, this job will wait in a PENDING state
>>> after it has
>>> +#                 finished its work, waiting for @block-job-finalize
>>> before
>>> +#                 making any block graph changes.
>>
>> sounds a bit strange in context of bitmap-population job
>>
> 
> Yeah, you're right. Copy-pasted for "consistency".
> 
>>> +#                 When true, this job will automatically
>>> +#                 perform its abort or commit actions.
>>> +#                 Defaults to true.
>>> +#
>>> +# @auto-dismiss: When false, this job will wait in a CONCLUDED state
>>> after it
>>> +#                has completely ceased all work, and awaits
>>> @block-job-dismiss.
>>> +#                When true, this job will automatically disappear
>>> from the query
>>> +#                list without user intervention.
>>> +#                Defaults to true.
>>> +#
>>> +# Since: 5.0
>>> +##
>>> +{ 'struct': 'BlockDirtyBitmapPopulate',
>>> +  'base': 'BlockDirtyBitmap',
>>> +  'data': { 'job-id': 'str',
>>> +            'pattern': 'BitmapPattern',
>>> +            '*on-error': 'BlockdevOnError',
>>> +            '*auto-finalize': 'bool',
>>> +            '*auto-dismiss': 'bool' } }
>>> +
>>>    ##
>>>    # @BlockDirtyBitmapSha256:
>>>    #
>>> diff --git a/qapi/job.json b/qapi/job.json
>>> index 5e658281f5..5f496d4630 100644
>>> --- a/qapi/job.json
>>> +++ b/qapi/job.json
>>> @@ -22,7 +22,7 @@
>>>    # Since: 1.7
>>>    ##
>>>    { 'enum': 'JobType',
>>> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
>>> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create',
>>> 'bitmap-populate'] }
>>>      ##
>>>    # @JobStatus:
>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>> index 6f9fd5e20e..a5884b597e 100644
>>> --- a/include/block/block_int.h
>>> +++ b/include/block/block_int.h
>>> @@ -1215,6 +1215,27 @@ BlockJob *backup_job_create(const char *job_id,
>>> BlockDriverState *bs,
>>>                                BlockCompletionFunc *cb, void *opaque,
>>>                                JobTxn *txn, Error **errp);
>>>    +/*
>>> + * bitpop_job_create: Create a new bitmap population job.
>>> + *
>>> + * @job_id: The id of the newly-created job.
>>> + * @bs: Block device associated with the @target_bitmap.
>>> + * @target_bitmap: The bitmap to populate.
>>> + * @on_error: What to do if an error on @bs is encountered.
>>> + * @creation_flags: Flags that control the behavior of the Job lifetime.
>>> + *                  See @BlockJobCreateFlags
>>> + * @cb: Completion function for the job.
>>> + * @opaque: Opaque pointer value passed to @cb.
>>> + * @txn: Transaction that this job is part of (may be NULL).
>>> + */
>>> +BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
>>> +                            BdrvDirtyBitmap *target_bitmap,
>>> +                            BitmapPattern pattern,
>>> +                            BlockdevOnError on_error,
>>> +                            int creation_flags,
>>> +                            BlockCompletionFunc *cb, void *opaque,
>>> +                            JobTxn *txn, Error **errp);
>>> +
>>>    void hmp_drive_add_node(Monitor *mon, const char *optstr);
>>>      BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>>> diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
>>> new file mode 100644
>>> index 0000000000..47d542dc12
>>> --- /dev/null
>>> +++ b/block/bitmap-alloc.c
>>> @@ -0,0 +1,207 @@
>>> +/*
>>> + * Async Dirty Bitmap Populator
>>> + *
>>> + * Copyright (C) 2020 Red Hat, Inc.
>>> + *
>>> + * Authors:
>>> + *  John Snow <jsnow@redhat.com>
>>> + *
>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>> later.
>>> + * See the COPYING file in the top-level directory.
>>> + *
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +
>>> +#include "trace.h"
>>> +#include "block/block.h"
>>> +#include "block/block_int.h"
>>> +#include "block/blockjob_int.h"
>>> +#include "block/block_backup.h"
>>> +#include "block/block-copy.h"
>>
>> I hope, not all includes are needed :)
> 
> Whoops, no, of course not. I copied the skeleton from backup, as you can
> tell ;)
> 
>>
>>> +#include "qapi/error.h"
>>> +#include "qapi/qmp/qerror.h"
>>> +#include "qemu/ratelimit.h"
>>> +#include "qemu/cutils.h"
>>> +#include "sysemu/block-backend.h"
>>> +#include "qemu/bitmap.h"
>>> +#include "qemu/error-report.h"
>>> +
>>> +typedef struct BitpopBlockJob {
>>> +    BlockJob common;
>>> +    BlockDriverState *bs;
>>> +    BdrvDirtyBitmap *target_bitmap;
>>> +    BdrvDirtyBitmap *new_bitmap;
>>> +    BlockdevOnError on_error;
>>> +    uint64_t len;
>>> +} BitpopBlockJob;
>>> +
>>> +static const BlockJobDriver bitpop_job_driver;
>>> +
>>> +static void bitpop_commit(Job *job)
>>> +{
>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>> +
>>> +    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
>>> +                                     NULL, true);
>>
>> Hmm, so you populate new_bitmap, and then merge to target. Why can't we
>> work
>> directly with target bitmap? The most probable case is that libvirt will
>> create bitmap specifically to use as target in this operation, or not?
>>
> 
> Most likely case, yes. Odds are very good it will be a brand new bitmap.
> 
> However, we already have a creation command -- I didn't want to make a
> second job-version of the command and then maintain two interfaces, so I
> made it a "merge into existing" style command instead.
> 
>> Hmm, just to make it possible to cancel the job and keep the target
>> bitmap in
>> original state? Is it really needed? I think on failure target bitmap
>> will be
>> removed anyway..
>>
> 
> You caught me being *lazy*. I copy the bitmap so I can unconditionally
> enable it to catch in-flight writes without having to create block graph
> modifications.
> 
> But, yes, to undo changes if we cancel.
> 
> I didn't want to make a job that was not able to be canceled. The
> alternative is pursuing the design where we allow new bitmaps only --
> because then on cancel we can just delete them.

On backup job (and any other) we can't rollback target changes on cancel.
So, I think it would be OK to take same semantics for the new job, keeping in
mind that it would be most probable usage case and no sense in creating
additional bitmaps. And if caller needs to use existent non-empty bitmap as
target and wants correct cancel, it always can create additional bitmap by
itself and then merge it to actual target.

And why new? It's up to user, what to use as target. And user knows, that on
failure or cancel target becomes invalid and will workaround this if needed.

> 
>>> +}
>>> +
>>> +/* no abort needed; just clean without committing. */
>>> +
>>> +static void bitpop_clean(Job *job)
>>> +{
>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>> +
>>> +    bdrv_release_dirty_bitmap(s->new_bitmap);
>>> +    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
>>> +}
>>> +
>>> +static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int
>>> error)
>>> +{
>>> +    return block_job_error_action(&job->common, job->on_error, true,
>>> error);
>>> +}
>>> +
>>> +static bool coroutine_fn yield_and_check(Job *job)
>>> +{
>>> +    if (job_is_cancelled(job)) {
>>> +        return true;
>>> +    }
>>> +
>>> +    job_sleep_ns(job, 0);
>>> +
>>> +    if (job_is_cancelled(job)) {
>>> +        return true;
>>> +    }
>>> +
>>> +    return false;
>>> +}
>>> +
>>> +static int coroutine_fn bitpop_run(Job *job, Error **errp)
>>> +{
>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>> +    int ret = 0;
>>> +    int64_t offset;
>>> +    int64_t count;
>>> +    int64_t bytes;
>>> +
>>> +    for (offset = 0; offset < s->len; ) {
>>> +        if (yield_and_check(job)) {
>>> +            ret = -ECANCELED;
>>> +            break;
>>> +        }
>>> +
>>> +        bytes = s->len - offset;
>>> +        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
>>> +        if (ret < 0) {
>>> +            if (bitpop_error_action(s, -ret) ==
>>> BLOCK_ERROR_ACTION_REPORT) {
>>> +                break;
>>> +            }
>>> +            continue;
>>> +        }
>>> +
>>> +        if (!count) {
>>> +            ret = 0;
>>
>> Hmm, I think it's impossible case.. If so, better to make an assertion
>> or ignore..
>>
> 
> OK, agreed.
> 
>>> +            break;
>>> +        }
>>> +
>>> +        if (ret) {
>>> +            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
>>> +            ret = 0;
>>> +        }
>>> +
>>> +        job_progress_update(job, count);
>>> +        offset += count;
>>> +    }
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +static const BlockJobDriver bitpop_job_driver = {
>>> +    .job_driver = {
>>> +        .instance_size          = sizeof(BitpopBlockJob),
>>> +        .job_type               = JOB_TYPE_BITMAP_POPULATE,
>>> +        .free                   = block_job_free,
>>> +        .user_resume            = block_job_user_resume,
>>> +        .run                    = bitpop_run,
>>> +        .commit                 = bitpop_commit,
>>> +        .clean                  = bitpop_clean,
>>> +    }
>>> +};
>>> +
>>> +
>>> +BlockJob *bitpop_job_create(
>>> +    const char *job_id,
>>> +    BlockDriverState *bs,
>>> +    BdrvDirtyBitmap *target_bitmap,
>>> +    BitmapPattern pattern,
>>> +    BlockdevOnError on_error,
>>> +    int creation_flags,
>>> +    BlockCompletionFunc *cb,
>>> +    void *opaque,
>>> +    JobTxn *txn,
>>> +    Error **errp)
>>> +{
>>> +    int64_t len;
>>> +    BitpopBlockJob *job = NULL;
>>> +    int64_t cluster_size;
>>> +    BdrvDirtyBitmap *new_bitmap = NULL;
>>> +
>>> +    assert(bs);
>>> +    assert(target_bitmap);
>>> +
>>> +    if (!bdrv_is_inserted(bs)) {
>>> +        error_setg(errp, "Device is not inserted: %s",
>>> +                   bdrv_get_device_name(bs));
>>> +        return NULL;
>>> +    }
>>
>> Why this?
>>
> 
> I assumed there was nothing to read the allocation map *of* if there
> wasn't a media present.
> 
> Am I mistaken?

is_inserted checks existing of bs->drv, but bitmap operations actually
doesn't need any drv.. Hmm. I'm not against this check anyway.

> 
>>> +
>>> +    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
>>> +        return NULL;
>>> +    }
>>
>> and this?
>>
> 
> Copy-paste: I don't understand if I want a new op blocker, to re-use an
> op-blocker, or to have no op blocker.
> 
> Genuinely I have no idea. I should left a review comment here, I forgot
> about this part, sorry.

I'm for no op blocker. As I understand, op-blockers are old and should be
replaced by permissions and frozen children. So, if we don't know, do we
need any blocking here, better go on without it. Also, op-blockers can't
block forbidden usage through filters anyway.

> 
>>> +
>>> +    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT,
>>> errp)) {
>>> +        return NULL;
>>> +    }
>>> +
>>> +    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
>>> +        error_setg(errp, "Unrecognized bitmap pattern");
>>> +        return NULL;
>>> +    }
>>> +
>>> +    len = bdrv_getlength(bs);
>>> +    if (len < 0) {
>>> +        error_setg_errno(errp, -len, "unable to get length for '%s'",
>>> +                         bdrv_get_device_name(bs));
>>> +        return NULL;
>>> +    }
>>> +
>>> +    /* NB: new bitmap is anonymous and enabled */
>>> +    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
>>> +    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
>>> +    if (!new_bitmap) {
>>> +        return NULL;
>>> +    }
>>> +
>>> +    /* Take ownership; we reserve the right to write into this
>>> on-commit. */
>>> +    bdrv_dirty_bitmap_set_busy(target_bitmap, true);
>>
>> Honestly, I still have bad understanding about how should we use dirty
>> bitmap mutex,
>> but note that bdrv_dirty_bitmap_set_busy locks the mutex. And it is (may
>> be) possible,
>> that busy status of the bitmap is changed after bdrv_dirty_bitmap_check
>> but before
>> bdrv_dirty_bitmap_set_busy.  So, more correct would be do both operation
>> under one
>> critical section. Still, I don't know is the situation possible.
>>
> 
> Aren't we under the BQL here? Can we be pre-empted? :(

Seems we are. But, as it's said above dirty_bitmap_mutex declaration:

     /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
      * Reading from the list can be done with either the BQL or the
      * dirty_bitmap_mutex.  Modifying a bitmap only requires
      * dirty_bitmap_mutex.  */

It means, that another thread may modify bitmap (for example its 'busy' field)
taking only dirty_bitmap_mutex, which will lead to the case I described.

> 
>>> +
>>> +    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
>>> +                           BLK_PERM_CONSISTENT_READ,
>>
>> Do we need it? We are not going to read..
>>
> 
> Copy-paste / leftover from an earlier draft where I was trying to
> achieve atomicity. It can be removed if we don't want the stricter
> atomicity.
> 
>>> +                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
>>> +                           0, creation_flags,
>>> +                           cb, opaque, errp);
>>> +    if (!job) {
>>> +        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
>>> +        bdrv_release_dirty_bitmap(new_bitmap);
>>> +        return NULL;
>>> +    }
>>> +
>>> +    job->bs = bs;
>>> +    job->on_error = on_error;
>>> +    job->target_bitmap = target_bitmap;
>>> +    job->new_bitmap = new_bitmap;
>>> +    job->len = len;
>>> +    job_progress_set_remaining(&job->common.job, job->len);
>>> +
>>> +    return &job->common;
>>> +}
>>> diff --git a/blockjob.c b/blockjob.c
>>> index 5d63b1e89d..7e450372bd 100644
>>> --- a/blockjob.c
>>> +++ b/blockjob.c
>>> @@ -56,7 +56,8 @@ static bool is_block_job(Job *job)
>>>        return job_type(job) == JOB_TYPE_BACKUP ||
>>>               job_type(job) == JOB_TYPE_COMMIT ||
>>>               job_type(job) == JOB_TYPE_MIRROR ||
>>> -           job_type(job) == JOB_TYPE_STREAM;
>>> +           job_type(job) == JOB_TYPE_STREAM ||
>>> +           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
>>>    }
>>>      BlockJob *block_job_next(BlockJob *bjob)
>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>> index 3bcb35c81d..f3cfc89d90 100644
>>> --- a/block/Makefile.objs
>>> +++ b/block/Makefile.objs
>>> @@ -36,6 +36,7 @@ block-obj-$(CONFIG_LIBSSH) += ssh.o
>>>    block-obj-y += accounting.o dirty-bitmap.o
>>>    block-obj-y += write-threshold.o
>>>    block-obj-y += backup.o
>>> +block-obj-y += bitmap-alloc.o
>>>    block-obj-$(CONFIG_REPLICATION) += replication.o
>>>    block-obj-y += throttle.o copy-on-read.o
>>>    block-obj-y += block-copy.o
>>>
>>
>>
> 
> Thanks for the review. I'll start making changes, but won't send V2 just
> yet.
> 
> --js
>
John Snow Feb. 26, 2020, 7:11 p.m. UTC | #4
On 2/26/20 12:07 AM, Vladimir Sementsov-Ogievskiy wrote:
> 25.02.2020 23:41, John Snow wrote:
>>
>>
>> On 2/25/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:
>>> 25.02.2020 3:56, John Snow wrote:
>>>> This job copies the allocation map into a bitmap. It's a job because
>>>> there's no guarantee that allocation interrogation will be quick (or
>>>> won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.
>>>>
>>>> It was designed with different possible population patterns in mind,
>>>> but only top layer allocation was implemented for now.
>>>>
>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>> ---
>>>>    qapi/block-core.json      |  48 +++++++++
>>>>    qapi/job.json             |   2 +-
>>>>    include/block/block_int.h |  21 ++++
>>>>    block/bitmap-alloc.c      | 207
>>>> ++++++++++++++++++++++++++++++++++++++
>>>>    blockjob.c                |   3 +-
>>>>    block/Makefile.objs       |   1 +
>>>>    6 files changed, 280 insertions(+), 2 deletions(-)
>>>>    create mode 100644 block/bitmap-alloc.c
>>>>
>>>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>>>> index 85e27bb61f..df1797681a 100644
>>>> --- a/qapi/block-core.json
>>>> +++ b/qapi/block-core.json
>>>> @@ -2245,6 +2245,54 @@
>>>>          { 'command': 'block-dirty-bitmap-merge',
>>>>            'data': 'BlockDirtyBitmapMerge' }
>>>>    +##
>>>> +# @BitmapPattern:
>>>> +#
>>>> +# An enumeration of possible patterns that can be written into a
>>>> bitmap.
>>>> +#
>>>> +# @allocation-top: The allocation status of the top layer
>>>> +#                  of the attached storage node.
>>>> +#
>>>> +# Since: 5.0
>>>> +##
>>>> +{ 'enum': 'BitmapPattern',
>>>> +  'data': ['allocation-top'] }
>>>> +
>>>> +##
>>>> +# @BlockDirtyBitmapPopulate:
>>>> +#
>>>> +# @job-id: identifier for the newly-created block job.
>>>> +#
>>>> +# @pattern: What pattern should be written into the bitmap?
>>>> +#
>>>> +# @on-error: the action to take if an error is encountered on a
>>>> bitmap's
>>>> +#            attached node, default 'report'.
>>>> +#            'stop' and 'enospc' can only be used if the block device
>>>> supports
>>>> +#            io-status (see BlockInfo).
>>>> +#
>>>> +# @auto-finalize: When false, this job will wait in a PENDING state
>>>> after it has
>>>> +#                 finished its work, waiting for @block-job-finalize
>>>> before
>>>> +#                 making any block graph changes.
>>>
>>> sounds a bit strange in context of bitmap-population job
>>>
>>
>> Yeah, you're right. Copy-pasted for "consistency".
>>
>>>> +#                 When true, this job will automatically
>>>> +#                 perform its abort or commit actions.
>>>> +#                 Defaults to true.
>>>> +#
>>>> +# @auto-dismiss: When false, this job will wait in a CONCLUDED state
>>>> after it
>>>> +#                has completely ceased all work, and awaits
>>>> @block-job-dismiss.
>>>> +#                When true, this job will automatically disappear
>>>> from the query
>>>> +#                list without user intervention.
>>>> +#                Defaults to true.
>>>> +#
>>>> +# Since: 5.0
>>>> +##
>>>> +{ 'struct': 'BlockDirtyBitmapPopulate',
>>>> +  'base': 'BlockDirtyBitmap',
>>>> +  'data': { 'job-id': 'str',
>>>> +            'pattern': 'BitmapPattern',
>>>> +            '*on-error': 'BlockdevOnError',
>>>> +            '*auto-finalize': 'bool',
>>>> +            '*auto-dismiss': 'bool' } }
>>>> +
>>>>    ##
>>>>    # @BlockDirtyBitmapSha256:
>>>>    #
>>>> diff --git a/qapi/job.json b/qapi/job.json
>>>> index 5e658281f5..5f496d4630 100644
>>>> --- a/qapi/job.json
>>>> +++ b/qapi/job.json
>>>> @@ -22,7 +22,7 @@
>>>>    # Since: 1.7
>>>>    ##
>>>>    { 'enum': 'JobType',
>>>> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
>>>> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create',
>>>> 'bitmap-populate'] }
>>>>      ##
>>>>    # @JobStatus:
>>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>>> index 6f9fd5e20e..a5884b597e 100644
>>>> --- a/include/block/block_int.h
>>>> +++ b/include/block/block_int.h
>>>> @@ -1215,6 +1215,27 @@ BlockJob *backup_job_create(const char *job_id,
>>>> BlockDriverState *bs,
>>>>                                BlockCompletionFunc *cb, void *opaque,
>>>>                                JobTxn *txn, Error **errp);
>>>>    +/*
>>>> + * bitpop_job_create: Create a new bitmap population job.
>>>> + *
>>>> + * @job_id: The id of the newly-created job.
>>>> + * @bs: Block device associated with the @target_bitmap.
>>>> + * @target_bitmap: The bitmap to populate.
>>>> + * @on_error: What to do if an error on @bs is encountered.
>>>> + * @creation_flags: Flags that control the behavior of the Job
>>>> lifetime.
>>>> + *                  See @BlockJobCreateFlags
>>>> + * @cb: Completion function for the job.
>>>> + * @opaque: Opaque pointer value passed to @cb.
>>>> + * @txn: Transaction that this job is part of (may be NULL).
>>>> + */
>>>> +BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
>>>> +                            BdrvDirtyBitmap *target_bitmap,
>>>> +                            BitmapPattern pattern,
>>>> +                            BlockdevOnError on_error,
>>>> +                            int creation_flags,
>>>> +                            BlockCompletionFunc *cb, void *opaque,
>>>> +                            JobTxn *txn, Error **errp);
>>>> +
>>>>    void hmp_drive_add_node(Monitor *mon, const char *optstr);
>>>>      BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>>>> diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
>>>> new file mode 100644
>>>> index 0000000000..47d542dc12
>>>> --- /dev/null
>>>> +++ b/block/bitmap-alloc.c
>>>> @@ -0,0 +1,207 @@
>>>> +/*
>>>> + * Async Dirty Bitmap Populator
>>>> + *
>>>> + * Copyright (C) 2020 Red Hat, Inc.
>>>> + *
>>>> + * Authors:
>>>> + *  John Snow <jsnow@redhat.com>
>>>> + *
>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>> later.
>>>> + * See the COPYING file in the top-level directory.
>>>> + *
>>>> + */
>>>> +
>>>> +#include "qemu/osdep.h"
>>>> +
>>>> +#include "trace.h"
>>>> +#include "block/block.h"
>>>> +#include "block/block_int.h"
>>>> +#include "block/blockjob_int.h"
>>>> +#include "block/block_backup.h"
>>>> +#include "block/block-copy.h"
>>>
>>> I hope, not all includes are needed :)
>>
>> Whoops, no, of course not. I copied the skeleton from backup, as you can
>> tell ;)
>>
>>>
>>>> +#include "qapi/error.h"
>>>> +#include "qapi/qmp/qerror.h"
>>>> +#include "qemu/ratelimit.h"
>>>> +#include "qemu/cutils.h"
>>>> +#include "sysemu/block-backend.h"
>>>> +#include "qemu/bitmap.h"
>>>> +#include "qemu/error-report.h"
>>>> +
>>>> +typedef struct BitpopBlockJob {
>>>> +    BlockJob common;
>>>> +    BlockDriverState *bs;
>>>> +    BdrvDirtyBitmap *target_bitmap;
>>>> +    BdrvDirtyBitmap *new_bitmap;
>>>> +    BlockdevOnError on_error;
>>>> +    uint64_t len;
>>>> +} BitpopBlockJob;
>>>> +
>>>> +static const BlockJobDriver bitpop_job_driver;
>>>> +
>>>> +static void bitpop_commit(Job *job)
>>>> +{
>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>> +
>>>> +    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
>>>> +                                     NULL, true);
>>>
>>> Hmm, so you populate new_bitmap, and then merge to target. Why can't we
>>> work
>>> directly with target bitmap? The most probable case is that libvirt will
>>> create bitmap specifically to use as target in this operation, or not?
>>>
>>
>> Most likely case, yes. Odds are very good it will be a brand new bitmap.
>>
>> However, we already have a creation command -- I didn't want to make a
>> second job-version of the command and then maintain two interfaces, so I
>> made it a "merge into existing" style command instead.
>>
>>> Hmm, just to make it possible to cancel the job and keep the target
>>> bitmap in
>>> original state? Is it really needed? I think on failure target bitmap
>>> will be
>>> removed anyway..
>>>
>>
>> You caught me being *lazy*. I copy the bitmap so I can unconditionally
>> enable it to catch in-flight writes without having to create block graph
>> modifications.
>>
>> But, yes, to undo changes if we cancel.
>>
>> I didn't want to make a job that was not able to be canceled. The
>> alternative is pursuing the design where we allow new bitmaps only --
>> because then on cancel we can just delete them.
> 
> On backup job (and any other) we can't rollback target changes on cancel.
> So, I think it would be OK to take same semantics for the new job,
> keeping in
> mind that it would be most probable usage case and no sense in creating
> additional bitmaps. And if caller needs to use existent non-empty bitmap as
> target and wants correct cancel, it always can create additional bitmap by
> itself and then merge it to actual target.
> 

While backup can't roll back data changes, it does avoid modifying
bitmaps if it didn't succeed. This is a bitmap-centric job and we have
the capability to do a complete rollback.

i.e. I am thinking more about consistency with bitmap behavior than I am
consistency with job behavior.

That this job becomes the only one to be able to be *fully* reversed on
cancel is unique to jobs -- but is shared by all transactionable bitmap
commands.

> And why new? It's up to user, what to use as target. And user knows,
> that on
> failure or cancel target becomes invalid and will workaround this if
> needed.
> 

I think you're advocating for writing directly into the bitmap and just
accepting that it's trashed afterwards, but I don't like this behavior
because it limits what we can do with this job in the future.

Is this just a matter of taste? Since every other bitmap command offers
some kind of rollback I felt more comfortable doing the same here.

I'm worried that if I decide to treat the result bitmap as disposable,
that I will need to provide greater atomicity for the allocation status
read. At the moment I rely on the temporary bitmap being *enabled* to
make sure that if any new writes happen in regions I have already read
that they are reflected in the bitmap anyway.

If the user passes a disabled bitmap, that guarantee is lost -- we lose
point in time semantics entirely. Writes that occur further in the file
are captured, but ones that occur earlier are missed.

If I make a temporary enabled bitmap, I catch everything.

I could prohibit *disabled* bitmaps from being passed in to this job,
but that again makes it of a more limited use for other patterns in the
future.

So I guess I would prefer to just leave this as-is for now if it's not
that harmful.

>>
>>>> +}
>>>> +
>>>> +/* no abort needed; just clean without committing. */
>>>> +
>>>> +static void bitpop_clean(Job *job)
>>>> +{
>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>> +
>>>> +    bdrv_release_dirty_bitmap(s->new_bitmap);
>>>> +    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
>>>> +}
>>>> +
>>>> +static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int
>>>> error)
>>>> +{
>>>> +    return block_job_error_action(&job->common, job->on_error, true,
>>>> error);
>>>> +}
>>>> +
>>>> +static bool coroutine_fn yield_and_check(Job *job)
>>>> +{
>>>> +    if (job_is_cancelled(job)) {
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    job_sleep_ns(job, 0);
>>>> +
>>>> +    if (job_is_cancelled(job)) {
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    return false;
>>>> +}
>>>> +
>>>> +static int coroutine_fn bitpop_run(Job *job, Error **errp)
>>>> +{
>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>> +    int ret = 0;
>>>> +    int64_t offset;
>>>> +    int64_t count;
>>>> +    int64_t bytes;
>>>> +
>>>> +    for (offset = 0; offset < s->len; ) {
>>>> +        if (yield_and_check(job)) {
>>>> +            ret = -ECANCELED;
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        bytes = s->len - offset;
>>>> +        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
>>>> +        if (ret < 0) {
>>>> +            if (bitpop_error_action(s, -ret) ==
>>>> BLOCK_ERROR_ACTION_REPORT) {
>>>> +                break;
>>>> +            }
>>>> +            continue;
>>>> +        }
>>>> +
>>>> +        if (!count) {
>>>> +            ret = 0;
>>>
>>> Hmm, I think it's impossible case.. If so, better to make an assertion
>>> or ignore..
>>>
>>
>> OK, agreed.
>>
>>>> +            break;
>>>> +        }
>>>> +
>>>> +        if (ret) {
>>>> +            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
>>>> +            ret = 0;
>>>> +        }
>>>> +
>>>> +        job_progress_update(job, count);
>>>> +        offset += count;
>>>> +    }
>>>> +
>>>> +    return ret;
>>>> +}
>>>> +
>>>> +static const BlockJobDriver bitpop_job_driver = {
>>>> +    .job_driver = {
>>>> +        .instance_size          = sizeof(BitpopBlockJob),
>>>> +        .job_type               = JOB_TYPE_BITMAP_POPULATE,
>>>> +        .free                   = block_job_free,
>>>> +        .user_resume            = block_job_user_resume,
>>>> +        .run                    = bitpop_run,
>>>> +        .commit                 = bitpop_commit,
>>>> +        .clean                  = bitpop_clean,
>>>> +    }
>>>> +};
>>>> +
>>>> +
>>>> +BlockJob *bitpop_job_create(
>>>> +    const char *job_id,
>>>> +    BlockDriverState *bs,
>>>> +    BdrvDirtyBitmap *target_bitmap,
>>>> +    BitmapPattern pattern,
>>>> +    BlockdevOnError on_error,
>>>> +    int creation_flags,
>>>> +    BlockCompletionFunc *cb,
>>>> +    void *opaque,
>>>> +    JobTxn *txn,
>>>> +    Error **errp)
>>>> +{
>>>> +    int64_t len;
>>>> +    BitpopBlockJob *job = NULL;
>>>> +    int64_t cluster_size;
>>>> +    BdrvDirtyBitmap *new_bitmap = NULL;
>>>> +
>>>> +    assert(bs);
>>>> +    assert(target_bitmap);
>>>> +
>>>> +    if (!bdrv_is_inserted(bs)) {
>>>> +        error_setg(errp, "Device is not inserted: %s",
>>>> +                   bdrv_get_device_name(bs));
>>>> +        return NULL;
>>>> +    }
>>>
>>> Why this?
>>>
>>
>> I assumed there was nothing to read the allocation map *of* if there
>> wasn't a media present.
>>
>> Am I mistaken?
> 
> is_inserted checks existing of bs->drv, but bitmap operations actually
> doesn't need any drv.. Hmm. I'm not against this check anyway.
> 

Maybe it would be clearer to say this:

if (pattern = BITMAP_PATTERN_ALLOCATION_TOP) {
    if (!bdrv_is_inserted(bs)) {
        ...
    }
} else {
    ...
}

We need to make sure bs->drv exists for the allocation map reading --
not for every theoretical pattern, but for this one we do.

>>
>>>> +
>>>> +    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
>>>> +        return NULL;
>>>> +    }
>>>
>>> and this?
>>>
>>
>> Copy-paste: I don't understand if I want a new op blocker, to re-use an
>> op-blocker, or to have no op blocker.
>>
>> Genuinely I have no idea. I should left a review comment here, I forgot
>> about this part, sorry.
> 
> I'm for no op blocker. As I understand, op-blockers are old and should be
> replaced by permissions and frozen children. So, if we don't know, do we
> need any blocking here, better go on without it. Also, op-blockers can't
> block forbidden usage through filters anyway.
> 

Sounds good to me, honestly.

>>
>>>> +
>>>> +    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT,
>>>> errp)) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
>>>> +        error_setg(errp, "Unrecognized bitmap pattern");
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    len = bdrv_getlength(bs);
>>>> +    if (len < 0) {
>>>> +        error_setg_errno(errp, -len, "unable to get length for '%s'",
>>>> +                         bdrv_get_device_name(bs));
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /* NB: new bitmap is anonymous and enabled */
>>>> +    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
>>>> +    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL,
>>>> errp);
>>>> +    if (!new_bitmap) {
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    /* Take ownership; we reserve the right to write into this
>>>> on-commit. */
>>>> +    bdrv_dirty_bitmap_set_busy(target_bitmap, true);
>>>
>>> Honestly, I still have bad understanding about how should we use dirty
>>> bitmap mutex,
>>> but note that bdrv_dirty_bitmap_set_busy locks the mutex. And it is (may
>>> be) possible,
>>> that busy status of the bitmap is changed after bdrv_dirty_bitmap_check
>>> but before
>>> bdrv_dirty_bitmap_set_busy.  So, more correct would be do both operation
>>> under one
>>> critical section. Still, I don't know is the situation possible.
>>>
>>
>> Aren't we under the BQL here? Can we be pre-empted? :(
> 
> Seems we are. But, as it's said above dirty_bitmap_mutex declaration:
> 
>     /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
>      * Reading from the list can be done with either the BQL or the
>      * dirty_bitmap_mutex.  Modifying a bitmap only requires
>      * dirty_bitmap_mutex.  */
> 
> It means, that another thread may modify bitmap (for example its 'busy'
> field)
> taking only dirty_bitmap_mutex, which will lead to the case I described.
> 

Alright, I'll add the locking change. Atomic check-and-lock for the busy
bit sounds like a good model anyway.

>>
>>>> +
>>>> +    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
>>>> +                           BLK_PERM_CONSISTENT_READ,
>>>
>>> Do we need it? We are not going to read..
>>>
>>
>> Copy-paste / leftover from an earlier draft where I was trying to
>> achieve atomicity. It can be removed if we don't want the stricter
>> atomicity.
>>

I guess I really don't need READ here. I guess ~RESIZE is the only
important one.

>>>> +                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
>>>> +                           0, creation_flags,
>>>> +                           cb, opaque, errp);
>>>> +    if (!job) {
>>>> +        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
>>>> +        bdrv_release_dirty_bitmap(new_bitmap);
>>>> +        return NULL;
>>>> +    }
>>>> +
>>>> +    job->bs = bs;
>>>> +    job->on_error = on_error;
>>>> +    job->target_bitmap = target_bitmap;
>>>> +    job->new_bitmap = new_bitmap;
>>>> +    job->len = len;
>>>> +    job_progress_set_remaining(&job->common.job, job->len);
>>>> +
>>>> +    return &job->common;
>>>> +}
>>>> diff --git a/blockjob.c b/blockjob.c
>>>> index 5d63b1e89d..7e450372bd 100644
>>>> --- a/blockjob.c
>>>> +++ b/blockjob.c
>>>> @@ -56,7 +56,8 @@ static bool is_block_job(Job *job)
>>>>        return job_type(job) == JOB_TYPE_BACKUP ||
>>>>               job_type(job) == JOB_TYPE_COMMIT ||
>>>>               job_type(job) == JOB_TYPE_MIRROR ||
>>>> -           job_type(job) == JOB_TYPE_STREAM;
>>>> +           job_type(job) == JOB_TYPE_STREAM ||
>>>> +           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
>>>>    }
>>>>      BlockJob *block_job_next(BlockJob *bjob)
>>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>>> index 3bcb35c81d..f3cfc89d90 100644
>>>> --- a/block/Makefile.objs
>>>> +++ b/block/Makefile.objs
>>>> @@ -36,6 +36,7 @@ block-obj-$(CONFIG_LIBSSH) += ssh.o
>>>>    block-obj-y += accounting.o dirty-bitmap.o
>>>>    block-obj-y += write-threshold.o
>>>>    block-obj-y += backup.o
>>>> +block-obj-y += bitmap-alloc.o
>>>>    block-obj-$(CONFIG_REPLICATION) += replication.o
>>>>    block-obj-y += throttle.o copy-on-read.o
>>>>    block-obj-y += block-copy.o
>>>>
>>>
>>>
>>

Thanks!
Vladimir Sementsov-Ogievskiy Feb. 27, 2020, 6:11 a.m. UTC | #5
26.02.2020 22:11, John Snow wrote:
> 
> 
> On 2/26/20 12:07 AM, Vladimir Sementsov-Ogievskiy wrote:
>> 25.02.2020 23:41, John Snow wrote:
>>>
>>>
>>> On 2/25/20 11:04 AM, Vladimir Sementsov-Ogievskiy wrote:
>>>> 25.02.2020 3:56, John Snow wrote:
>>>>> This job copies the allocation map into a bitmap. It's a job because
>>>>> there's no guarantee that allocation interrogation will be quick (or
>>>>> won't hang), so it cannot be retrofit into block-dirty-bitmap-merge.
>>>>>
>>>>> It was designed with different possible population patterns in mind,
>>>>> but only top layer allocation was implemented for now.
>>>>>
>>>>> Signed-off-by: John Snow <jsnow@redhat.com>
>>>>> ---
>>>>>     qapi/block-core.json      |  48 +++++++++
>>>>>     qapi/job.json             |   2 +-
>>>>>     include/block/block_int.h |  21 ++++
>>>>>     block/bitmap-alloc.c      | 207
>>>>> ++++++++++++++++++++++++++++++++++++++
>>>>>     blockjob.c                |   3 +-
>>>>>     block/Makefile.objs       |   1 +
>>>>>     6 files changed, 280 insertions(+), 2 deletions(-)
>>>>>     create mode 100644 block/bitmap-alloc.c
>>>>>
>>>>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>>>>> index 85e27bb61f..df1797681a 100644
>>>>> --- a/qapi/block-core.json
>>>>> +++ b/qapi/block-core.json
>>>>> @@ -2245,6 +2245,54 @@
>>>>>           { 'command': 'block-dirty-bitmap-merge',
>>>>>             'data': 'BlockDirtyBitmapMerge' }
>>>>>     +##
>>>>> +# @BitmapPattern:
>>>>> +#
>>>>> +# An enumeration of possible patterns that can be written into a
>>>>> bitmap.
>>>>> +#
>>>>> +# @allocation-top: The allocation status of the top layer
>>>>> +#                  of the attached storage node.
>>>>> +#
>>>>> +# Since: 5.0
>>>>> +##
>>>>> +{ 'enum': 'BitmapPattern',
>>>>> +  'data': ['allocation-top'] }
>>>>> +
>>>>> +##
>>>>> +# @BlockDirtyBitmapPopulate:
>>>>> +#
>>>>> +# @job-id: identifier for the newly-created block job.
>>>>> +#
>>>>> +# @pattern: What pattern should be written into the bitmap?
>>>>> +#
>>>>> +# @on-error: the action to take if an error is encountered on a
>>>>> bitmap's
>>>>> +#            attached node, default 'report'.
>>>>> +#            'stop' and 'enospc' can only be used if the block device
>>>>> supports
>>>>> +#            io-status (see BlockInfo).
>>>>> +#
>>>>> +# @auto-finalize: When false, this job will wait in a PENDING state
>>>>> after it has
>>>>> +#                 finished its work, waiting for @block-job-finalize
>>>>> before
>>>>> +#                 making any block graph changes.
>>>>
>>>> sounds a bit strange in context of bitmap-population job
>>>>
>>>
>>> Yeah, you're right. Copy-pasted for "consistency".
>>>
>>>>> +#                 When true, this job will automatically
>>>>> +#                 perform its abort or commit actions.
>>>>> +#                 Defaults to true.
>>>>> +#
>>>>> +# @auto-dismiss: When false, this job will wait in a CONCLUDED state
>>>>> after it
>>>>> +#                has completely ceased all work, and awaits
>>>>> @block-job-dismiss.
>>>>> +#                When true, this job will automatically disappear
>>>>> from the query
>>>>> +#                list without user intervention.
>>>>> +#                Defaults to true.
>>>>> +#
>>>>> +# Since: 5.0
>>>>> +##
>>>>> +{ 'struct': 'BlockDirtyBitmapPopulate',
>>>>> +  'base': 'BlockDirtyBitmap',
>>>>> +  'data': { 'job-id': 'str',
>>>>> +            'pattern': 'BitmapPattern',
>>>>> +            '*on-error': 'BlockdevOnError',
>>>>> +            '*auto-finalize': 'bool',
>>>>> +            '*auto-dismiss': 'bool' } }
>>>>> +
>>>>>     ##
>>>>>     # @BlockDirtyBitmapSha256:
>>>>>     #
>>>>> diff --git a/qapi/job.json b/qapi/job.json
>>>>> index 5e658281f5..5f496d4630 100644
>>>>> --- a/qapi/job.json
>>>>> +++ b/qapi/job.json
>>>>> @@ -22,7 +22,7 @@
>>>>>     # Since: 1.7
>>>>>     ##
>>>>>     { 'enum': 'JobType',
>>>>> -  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
>>>>> +  'data': ['commit', 'stream', 'mirror', 'backup', 'create',
>>>>> 'bitmap-populate'] }
>>>>>       ##
>>>>>     # @JobStatus:
>>>>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>>>>> index 6f9fd5e20e..a5884b597e 100644
>>>>> --- a/include/block/block_int.h
>>>>> +++ b/include/block/block_int.h
>>>>> @@ -1215,6 +1215,27 @@ BlockJob *backup_job_create(const char *job_id,
>>>>> BlockDriverState *bs,
>>>>>                                 BlockCompletionFunc *cb, void *opaque,
>>>>>                                 JobTxn *txn, Error **errp);
>>>>>     +/*
>>>>> + * bitpop_job_create: Create a new bitmap population job.
>>>>> + *
>>>>> + * @job_id: The id of the newly-created job.
>>>>> + * @bs: Block device associated with the @target_bitmap.
>>>>> + * @target_bitmap: The bitmap to populate.
>>>>> + * @on_error: What to do if an error on @bs is encountered.
>>>>> + * @creation_flags: Flags that control the behavior of the Job
>>>>> lifetime.
>>>>> + *                  See @BlockJobCreateFlags
>>>>> + * @cb: Completion function for the job.
>>>>> + * @opaque: Opaque pointer value passed to @cb.
>>>>> + * @txn: Transaction that this job is part of (may be NULL).
>>>>> + */
>>>>> +BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
>>>>> +                            BdrvDirtyBitmap *target_bitmap,
>>>>> +                            BitmapPattern pattern,
>>>>> +                            BlockdevOnError on_error,
>>>>> +                            int creation_flags,
>>>>> +                            BlockCompletionFunc *cb, void *opaque,
>>>>> +                            JobTxn *txn, Error **errp);
>>>>> +
>>>>>     void hmp_drive_add_node(Monitor *mon, const char *optstr);
>>>>>       BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>>>>> diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
>>>>> new file mode 100644
>>>>> index 0000000000..47d542dc12
>>>>> --- /dev/null
>>>>> +++ b/block/bitmap-alloc.c
>>>>> @@ -0,0 +1,207 @@
>>>>> +/*
>>>>> + * Async Dirty Bitmap Populator
>>>>> + *
>>>>> + * Copyright (C) 2020 Red Hat, Inc.
>>>>> + *
>>>>> + * Authors:
>>>>> + *  John Snow <jsnow@redhat.com>
>>>>> + *
>>>>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>>>>> later.
>>>>> + * See the COPYING file in the top-level directory.
>>>>> + *
>>>>> + */
>>>>> +
>>>>> +#include "qemu/osdep.h"
>>>>> +
>>>>> +#include "trace.h"
>>>>> +#include "block/block.h"
>>>>> +#include "block/block_int.h"
>>>>> +#include "block/blockjob_int.h"
>>>>> +#include "block/block_backup.h"
>>>>> +#include "block/block-copy.h"
>>>>
>>>> I hope, not all includes are needed :)
>>>
>>> Whoops, no, of course not. I copied the skeleton from backup, as you can
>>> tell ;)
>>>
>>>>
>>>>> +#include "qapi/error.h"
>>>>> +#include "qapi/qmp/qerror.h"
>>>>> +#include "qemu/ratelimit.h"
>>>>> +#include "qemu/cutils.h"
>>>>> +#include "sysemu/block-backend.h"
>>>>> +#include "qemu/bitmap.h"
>>>>> +#include "qemu/error-report.h"
>>>>> +
>>>>> +typedef struct BitpopBlockJob {
>>>>> +    BlockJob common;
>>>>> +    BlockDriverState *bs;
>>>>> +    BdrvDirtyBitmap *target_bitmap;
>>>>> +    BdrvDirtyBitmap *new_bitmap;
>>>>> +    BlockdevOnError on_error;
>>>>> +    uint64_t len;
>>>>> +} BitpopBlockJob;
>>>>> +
>>>>> +static const BlockJobDriver bitpop_job_driver;
>>>>> +
>>>>> +static void bitpop_commit(Job *job)
>>>>> +{
>>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>>> +
>>>>> +    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
>>>>> +                                     NULL, true);
>>>>
>>>> Hmm, so you populate new_bitmap, and then merge to target. Why can't we
>>>> work
>>>> directly with target bitmap? The most probable case is that libvirt will
>>>> create bitmap specifically to use as target in this operation, or not?
>>>>
>>>
>>> Most likely case, yes. Odds are very good it will be a brand new bitmap.
>>>
>>> However, we already have a creation command -- I didn't want to make a
>>> second job-version of the command and then maintain two interfaces, so I
>>> made it a "merge into existing" style command instead.
>>>
>>>> Hmm, just to make it possible to cancel the job and keep the target
>>>> bitmap in
>>>> original state? Is it really needed? I think on failure target bitmap
>>>> will be
>>>> removed anyway..
>>>>
>>>
>>> You caught me being *lazy*. I copy the bitmap so I can unconditionally
>>> enable it to catch in-flight writes without having to create block graph
>>> modifications.
>>>
>>> But, yes, to undo changes if we cancel.
>>>
>>> I didn't want to make a job that was not able to be canceled. The
>>> alternative is pursuing the design where we allow new bitmaps only --
>>> because then on cancel we can just delete them.
>>
>> On backup job (and any other) we can't rollback target changes on cancel.
>> So, I think it would be OK to take same semantics for the new job,
>> keeping in
>> mind that it would be most probable usage case and no sense in creating
>> additional bitmaps. And if caller needs to use existent non-empty bitmap as
>> target and wants correct cancel, it always can create additional bitmap by
>> itself and then merge it to actual target.
>>
> 
> While backup can't roll back data changes, it does avoid modifying
> bitmaps if it didn't succeed. This is a bitmap-centric job and we have
> the capability to do a complete rollback.
> 
> i.e. I am thinking more about consistency with bitmap behavior than I am
> consistency with job behavior.
> 
> That this job becomes the only one to be able to be *fully* reversed on
> cancel is unique to jobs -- but is shared by all transactionable bitmap
> commands.

Hmm, yes.. But it will be useless without a possibility to put job-completion
into transaction. Still we'll need such thing anyway at some point.

> 
>> And why new? It's up to user, what to use as target. And user knows,
>> that on
>> failure or cancel target becomes invalid and will workaround this if
>> needed.
>>
> 
> I think you're advocating for writing directly into the bitmap and just
> accepting that it's trashed afterwards, but I don't like this behavior
> because it limits what we can do with this job in the future.
> 
> Is this just a matter of taste? Since every other bitmap command offers
> some kind of rollback I felt more comfortable doing the same here.

OK

> 
> I'm worried that if I decide to treat the result bitmap as disposable,
> that I will need to provide greater atomicity for the allocation status
> read. At the moment I rely on the temporary bitmap being *enabled* to
> make sure that if any new writes happen in regions I have already read
> that they are reflected in the bitmap anyway.
> 
> If the user passes a disabled bitmap, that guarantee is lost -- we lose
> point in time semantics entirely. Writes that occur further in the file
> are captured, but ones that occur earlier are missed.

Still, if user pass disabled bitmap, it will be invalid immediately after
job finish.

> 
> If I make a temporary enabled bitmap, I catch everything.
> 
> I could prohibit *disabled* bitmaps from being passed in to this job,
> but that again makes it of a more limited use for other patterns in the
> future.
> 
> So I guess I would prefer to just leave this as-is for now if it's not
> that harmful.

OK, I agreed, because of semantics of other bitmap-related commands.

> 
>>>
>>>>> +}
>>>>> +
>>>>> +/* no abort needed; just clean without committing. */
>>>>> +
>>>>> +static void bitpop_clean(Job *job)
>>>>> +{
>>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>>> +
>>>>> +    bdrv_release_dirty_bitmap(s->new_bitmap);
>>>>> +    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
>>>>> +}
>>>>> +
>>>>> +static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int
>>>>> error)
>>>>> +{
>>>>> +    return block_job_error_action(&job->common, job->on_error, true,
>>>>> error);
>>>>> +}
>>>>> +
>>>>> +static bool coroutine_fn yield_and_check(Job *job)
>>>>> +{
>>>>> +    if (job_is_cancelled(job)) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>> +    job_sleep_ns(job, 0);
>>>>> +
>>>>> +    if (job_is_cancelled(job)) {
>>>>> +        return true;
>>>>> +    }
>>>>> +
>>>>> +    return false;
>>>>> +}
>>>>> +
>>>>> +static int coroutine_fn bitpop_run(Job *job, Error **errp)
>>>>> +{
>>>>> +    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
>>>>> +    int ret = 0;
>>>>> +    int64_t offset;
>>>>> +    int64_t count;
>>>>> +    int64_t bytes;
>>>>> +
>>>>> +    for (offset = 0; offset < s->len; ) {
>>>>> +        if (yield_and_check(job)) {
>>>>> +            ret = -ECANCELED;
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        bytes = s->len - offset;
>>>>> +        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
>>>>> +        if (ret < 0) {
>>>>> +            if (bitpop_error_action(s, -ret) ==
>>>>> BLOCK_ERROR_ACTION_REPORT) {
>>>>> +                break;
>>>>> +            }
>>>>> +            continue;
>>>>> +        }
>>>>> +
>>>>> +        if (!count) {
>>>>> +            ret = 0;
>>>>
>>>> Hmm, I think it's impossible case.. If so, better to make an assertion
>>>> or ignore..
>>>>
>>>
>>> OK, agreed.
>>>
>>>>> +            break;
>>>>> +        }
>>>>> +
>>>>> +        if (ret) {
>>>>> +            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
>>>>> +            ret = 0;
>>>>> +        }
>>>>> +
>>>>> +        job_progress_update(job, count);
>>>>> +        offset += count;
>>>>> +    }
>>>>> +
>>>>> +    return ret;
>>>>> +}
>>>>> +
>>>>> +static const BlockJobDriver bitpop_job_driver = {
>>>>> +    .job_driver = {
>>>>> +        .instance_size          = sizeof(BitpopBlockJob),
>>>>> +        .job_type               = JOB_TYPE_BITMAP_POPULATE,
>>>>> +        .free                   = block_job_free,
>>>>> +        .user_resume            = block_job_user_resume,
>>>>> +        .run                    = bitpop_run,
>>>>> +        .commit                 = bitpop_commit,
>>>>> +        .clean                  = bitpop_clean,
>>>>> +    }
>>>>> +};
>>>>> +
>>>>> +
>>>>> +BlockJob *bitpop_job_create(
>>>>> +    const char *job_id,
>>>>> +    BlockDriverState *bs,
>>>>> +    BdrvDirtyBitmap *target_bitmap,
>>>>> +    BitmapPattern pattern,
>>>>> +    BlockdevOnError on_error,
>>>>> +    int creation_flags,
>>>>> +    BlockCompletionFunc *cb,
>>>>> +    void *opaque,
>>>>> +    JobTxn *txn,
>>>>> +    Error **errp)
>>>>> +{
>>>>> +    int64_t len;
>>>>> +    BitpopBlockJob *job = NULL;
>>>>> +    int64_t cluster_size;
>>>>> +    BdrvDirtyBitmap *new_bitmap = NULL;
>>>>> +
>>>>> +    assert(bs);
>>>>> +    assert(target_bitmap);
>>>>> +
>>>>> +    if (!bdrv_is_inserted(bs)) {
>>>>> +        error_setg(errp, "Device is not inserted: %s",
>>>>> +                   bdrv_get_device_name(bs));
>>>>> +        return NULL;
>>>>> +    }
>>>>
>>>> Why this?
>>>>
>>>
>>> I assumed there was nothing to read the allocation map *of* if there
>>> wasn't a media present.
>>>
>>> Am I mistaken?
>>
>> is_inserted checks existing of bs->drv, but bitmap operations actually
>> doesn't need any drv.. Hmm. I'm not against this check anyway.
>>
> 
> Maybe it would be clearer to say this:
> 
> if (pattern = BITMAP_PATTERN_ALLOCATION_TOP) {
>      if (!bdrv_is_inserted(bs)) {
>          ...
>      }
> } else {
>      ...
> }
> 
> We need to make sure bs->drv exists for the allocation map reading --
> not for every theoretical pattern, but for this one we do.

We can leave this as is now, I don't want to care much. Still I just doubt that
this check is needed in any job. What are the cases when we have bs without drv?
Corrupted qcow2? And where is guarantee that we will not lose drv during the job?

> 
>>>
>>>>> +
>>>>> +    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>
>>>> and this?
>>>>
>>>
>>> Copy-paste: I don't understand if I want a new op blocker, to re-use an
>>> op-blocker, or to have no op blocker.
>>>
>>> Genuinely I have no idea. I should left a review comment here, I forgot
>>> about this part, sorry.
>>
>> I'm for no op blocker. As I understand, op-blockers are old and should be
>> replaced by permissions and frozen children. So, if we don't know, do we
>> need any blocking here, better go on without it. Also, op-blockers can't
>> block forbidden usage through filters anyway.
>>
> 
> Sounds good to me, honestly.
> 
>>>
>>>>> +
>>>>> +    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT,
>>>>> errp)) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
>>>>> +        error_setg(errp, "Unrecognized bitmap pattern");
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    len = bdrv_getlength(bs);
>>>>> +    if (len < 0) {
>>>>> +        error_setg_errno(errp, -len, "unable to get length for '%s'",
>>>>> +                         bdrv_get_device_name(bs));
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    /* NB: new bitmap is anonymous and enabled */
>>>>> +    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
>>>>> +    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL,
>>>>> errp);
>>>>> +    if (!new_bitmap) {
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    /* Take ownership; we reserve the right to write into this
>>>>> on-commit. */
>>>>> +    bdrv_dirty_bitmap_set_busy(target_bitmap, true);
>>>>
>>>> Honestly, I still have bad understanding about how should we use dirty
>>>> bitmap mutex,
>>>> but note that bdrv_dirty_bitmap_set_busy locks the mutex. And it is (may
>>>> be) possible,
>>>> that busy status of the bitmap is changed after bdrv_dirty_bitmap_check
>>>> but before
>>>> bdrv_dirty_bitmap_set_busy.  So, more correct would be do both operation
>>>> under one
>>>> critical section. Still, I don't know is the situation possible.
>>>>
>>>
>>> Aren't we under the BQL here? Can we be pre-empted? :(
>>
>> Seems we are. But, as it's said above dirty_bitmap_mutex declaration:
>>
>>      /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex.
>>       * Reading from the list can be done with either the BQL or the
>>       * dirty_bitmap_mutex.  Modifying a bitmap only requires
>>       * dirty_bitmap_mutex.  */
>>
>> It means, that another thread may modify bitmap (for example its 'busy'
>> field)
>> taking only dirty_bitmap_mutex, which will lead to the case I described.
>>
> 
> Alright, I'll add the locking change. Atomic check-and-lock for the busy
> bit sounds like a good model anyway.
> 
>>>
>>>>> +
>>>>> +    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
>>>>> +                           BLK_PERM_CONSISTENT_READ,
>>>>
>>>> Do we need it? We are not going to read..
>>>>
>>>
>>> Copy-paste / leftover from an earlier draft where I was trying to
>>> achieve atomicity. It can be removed if we don't want the stricter
>>> atomicity.
>>>
> 
> I guess I really don't need READ here. I guess ~RESIZE is the only
> important one.

Agree

> 
>>>>> +                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
>>>>> +                           0, creation_flags,
>>>>> +                           cb, opaque, errp);
>>>>> +    if (!job) {
>>>>> +        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
>>>>> +        bdrv_release_dirty_bitmap(new_bitmap);
>>>>> +        return NULL;
>>>>> +    }
>>>>> +
>>>>> +    job->bs = bs;
>>>>> +    job->on_error = on_error;
>>>>> +    job->target_bitmap = target_bitmap;
>>>>> +    job->new_bitmap = new_bitmap;
>>>>> +    job->len = len;
>>>>> +    job_progress_set_remaining(&job->common.job, job->len);
>>>>> +
>>>>> +    return &job->common;
>>>>> +}
>>>>> diff --git a/blockjob.c b/blockjob.c
>>>>> index 5d63b1e89d..7e450372bd 100644
>>>>> --- a/blockjob.c
>>>>> +++ b/blockjob.c
>>>>> @@ -56,7 +56,8 @@ static bool is_block_job(Job *job)
>>>>>         return job_type(job) == JOB_TYPE_BACKUP ||
>>>>>                job_type(job) == JOB_TYPE_COMMIT ||
>>>>>                job_type(job) == JOB_TYPE_MIRROR ||
>>>>> -           job_type(job) == JOB_TYPE_STREAM;
>>>>> +           job_type(job) == JOB_TYPE_STREAM ||
>>>>> +           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
>>>>>     }
>>>>>       BlockJob *block_job_next(BlockJob *bjob)
>>>>> diff --git a/block/Makefile.objs b/block/Makefile.objs
>>>>> index 3bcb35c81d..f3cfc89d90 100644
>>>>> --- a/block/Makefile.objs
>>>>> +++ b/block/Makefile.objs
>>>>> @@ -36,6 +36,7 @@ block-obj-$(CONFIG_LIBSSH) += ssh.o
>>>>>     block-obj-y += accounting.o dirty-bitmap.o
>>>>>     block-obj-y += write-threshold.o
>>>>>     block-obj-y += backup.o
>>>>> +block-obj-y += bitmap-alloc.o
>>>>>     block-obj-$(CONFIG_REPLICATION) += replication.o
>>>>>     block-obj-y += throttle.o copy-on-read.o
>>>>>     block-obj-y += block-copy.o
>>>>>
>>>>
>>>>
>>>
> 
> Thanks!
>
John Snow March 3, 2020, 9:39 p.m. UTC | #6
On 2/27/20 1:11 AM, Vladimir Sementsov-Ogievskiy wrote:
> 
> Still, if user pass disabled bitmap, it will be invalid immediately after
> job finish.

Yes ... In truth, I really want to augment this job to provide a defined
point-in-time semantic so it can be fully useful in all circumstances.

At the moment, the point-in-time it provides is "at finalize", which may
or may not be the single most useful semantic. (It matches mirror --
because that was easier.)

The other option is "at start" which matches backup, but will require a
new filter and some changes to permissions. That was more invasive, so I
avoided it for now.

(In the interest of a speedy resolution for libvirt, can we add a
critical_moment=[START|FINALIZE] option to this job *later*?)
diff mbox series

Patch

diff --git a/qapi/block-core.json b/qapi/block-core.json
index 85e27bb61f..df1797681a 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2245,6 +2245,54 @@ 
       { 'command': 'block-dirty-bitmap-merge',
         'data': 'BlockDirtyBitmapMerge' }
 
+##
+# @BitmapPattern:
+#
+# An enumeration of possible patterns that can be written into a bitmap.
+#
+# @allocation-top: The allocation status of the top layer
+#                  of the attached storage node.
+#
+# Since: 5.0
+##
+{ 'enum': 'BitmapPattern',
+  'data': ['allocation-top'] }
+
+##
+# @BlockDirtyBitmapPopulate:
+#
+# @job-id: identifier for the newly-created block job.
+#
+# @pattern: What pattern should be written into the bitmap?
+#
+# @on-error: the action to take if an error is encountered on a bitmap's
+#            attached node, default 'report'.
+#            'stop' and 'enospc' can only be used if the block device supports
+#            io-status (see BlockInfo).
+#
+# @auto-finalize: When false, this job will wait in a PENDING state after it has
+#                 finished its work, waiting for @block-job-finalize before
+#                 making any block graph changes.
+#                 When true, this job will automatically
+#                 perform its abort or commit actions.
+#                 Defaults to true.
+#
+# @auto-dismiss: When false, this job will wait in a CONCLUDED state after it
+#                has completely ceased all work, and awaits @block-job-dismiss.
+#                When true, this job will automatically disappear from the query
+#                list without user intervention.
+#                Defaults to true.
+#
+# Since: 5.0
+##
+{ 'struct': 'BlockDirtyBitmapPopulate',
+  'base': 'BlockDirtyBitmap',
+  'data': { 'job-id': 'str',
+            'pattern': 'BitmapPattern',
+            '*on-error': 'BlockdevOnError',
+            '*auto-finalize': 'bool',
+            '*auto-dismiss': 'bool' } }
+
 ##
 # @BlockDirtyBitmapSha256:
 #
diff --git a/qapi/job.json b/qapi/job.json
index 5e658281f5..5f496d4630 100644
--- a/qapi/job.json
+++ b/qapi/job.json
@@ -22,7 +22,7 @@ 
 # Since: 1.7
 ##
 { 'enum': 'JobType',
-  'data': ['commit', 'stream', 'mirror', 'backup', 'create'] }
+  'data': ['commit', 'stream', 'mirror', 'backup', 'create', 'bitmap-populate'] }
 
 ##
 # @JobStatus:
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 6f9fd5e20e..a5884b597e 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -1215,6 +1215,27 @@  BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
                             BlockCompletionFunc *cb, void *opaque,
                             JobTxn *txn, Error **errp);
 
+/*
+ * bitpop_job_create: Create a new bitmap population job.
+ *
+ * @job_id: The id of the newly-created job.
+ * @bs: Block device associated with the @target_bitmap.
+ * @target_bitmap: The bitmap to populate.
+ * @on_error: What to do if an error on @bs is encountered.
+ * @creation_flags: Flags that control the behavior of the Job lifetime.
+ *                  See @BlockJobCreateFlags
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
+ */
+BlockJob *bitpop_job_create(const char *job_id, BlockDriverState *bs,
+                            BdrvDirtyBitmap *target_bitmap,
+                            BitmapPattern pattern,
+                            BlockdevOnError on_error,
+                            int creation_flags,
+                            BlockCompletionFunc *cb, void *opaque,
+                            JobTxn *txn, Error **errp);
+
 void hmp_drive_add_node(Monitor *mon, const char *optstr);
 
 BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
diff --git a/block/bitmap-alloc.c b/block/bitmap-alloc.c
new file mode 100644
index 0000000000..47d542dc12
--- /dev/null
+++ b/block/bitmap-alloc.c
@@ -0,0 +1,207 @@ 
+/*
+ * Async Dirty Bitmap Populator
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * Authors:
+ *  John Snow <jsnow@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "trace.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "block/blockjob_int.h"
+#include "block/block_backup.h"
+#include "block/block-copy.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/ratelimit.h"
+#include "qemu/cutils.h"
+#include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
+
+typedef struct BitpopBlockJob {
+    BlockJob common;
+    BlockDriverState *bs;
+    BdrvDirtyBitmap *target_bitmap;
+    BdrvDirtyBitmap *new_bitmap;
+    BlockdevOnError on_error;
+    uint64_t len;
+} BitpopBlockJob;
+
+static const BlockJobDriver bitpop_job_driver;
+
+static void bitpop_commit(Job *job)
+{
+    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
+
+    bdrv_dirty_bitmap_merge_internal(s->target_bitmap, s->new_bitmap,
+                                     NULL, true);
+}
+
+/* no abort needed; just clean without committing. */
+
+static void bitpop_clean(Job *job)
+{
+    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
+
+    bdrv_release_dirty_bitmap(s->new_bitmap);
+    bdrv_dirty_bitmap_set_busy(s->target_bitmap, false);
+}
+
+static BlockErrorAction bitpop_error_action(BitpopBlockJob *job, int error)
+{
+    return block_job_error_action(&job->common, job->on_error, true, error);
+}
+
+static bool coroutine_fn yield_and_check(Job *job)
+{
+    if (job_is_cancelled(job)) {
+        return true;
+    }
+
+    job_sleep_ns(job, 0);
+
+    if (job_is_cancelled(job)) {
+        return true;
+    }
+
+    return false;
+}
+
+static int coroutine_fn bitpop_run(Job *job, Error **errp)
+{
+    BitpopBlockJob *s = container_of(job, BitpopBlockJob, common.job);
+    int ret = 0;
+    int64_t offset;
+    int64_t count;
+    int64_t bytes;
+
+    for (offset = 0; offset < s->len; ) {
+        if (yield_and_check(job)) {
+            ret = -ECANCELED;
+            break;
+        }
+
+        bytes = s->len - offset;
+        ret = bdrv_is_allocated(s->bs, offset, bytes, &count);
+        if (ret < 0) {
+            if (bitpop_error_action(s, -ret) == BLOCK_ERROR_ACTION_REPORT) {
+                break;
+            }
+            continue;
+        }
+
+        if (!count) {
+            ret = 0;
+            break;
+        }
+
+        if (ret) {
+            bdrv_set_dirty_bitmap(s->new_bitmap, offset, count);
+            ret = 0;
+        }
+
+        job_progress_update(job, count);
+        offset += count;
+    }
+
+    return ret;
+}
+
+static const BlockJobDriver bitpop_job_driver = {
+    .job_driver = {
+        .instance_size          = sizeof(BitpopBlockJob),
+        .job_type               = JOB_TYPE_BITMAP_POPULATE,
+        .free                   = block_job_free,
+        .user_resume            = block_job_user_resume,
+        .run                    = bitpop_run,
+        .commit                 = bitpop_commit,
+        .clean                  = bitpop_clean,
+    }
+};
+
+
+BlockJob *bitpop_job_create(
+    const char *job_id,
+    BlockDriverState *bs,
+    BdrvDirtyBitmap *target_bitmap,
+    BitmapPattern pattern,
+    BlockdevOnError on_error,
+    int creation_flags,
+    BlockCompletionFunc *cb,
+    void *opaque,
+    JobTxn *txn,
+    Error **errp)
+{
+    int64_t len;
+    BitpopBlockJob *job = NULL;
+    int64_t cluster_size;
+    BdrvDirtyBitmap *new_bitmap = NULL;
+
+    assert(bs);
+    assert(target_bitmap);
+
+    if (!bdrv_is_inserted(bs)) {
+        error_setg(errp, "Device is not inserted: %s",
+                   bdrv_get_device_name(bs));
+        return NULL;
+    }
+
+    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+        return NULL;
+    }
+
+    if (bdrv_dirty_bitmap_check(target_bitmap, BDRV_BITMAP_DEFAULT, errp)) {
+        return NULL;
+    }
+
+    if (pattern != BITMAP_PATTERN_ALLOCATION_TOP) {
+        error_setg(errp, "Unrecognized bitmap pattern");
+        return NULL;
+    }
+
+    len = bdrv_getlength(bs);
+    if (len < 0) {
+        error_setg_errno(errp, -len, "unable to get length for '%s'",
+                         bdrv_get_device_name(bs));
+        return NULL;
+    }
+
+    /* NB: new bitmap is anonymous and enabled */
+    cluster_size = bdrv_dirty_bitmap_granularity(target_bitmap);
+    new_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
+    if (!new_bitmap) {
+        return NULL;
+    }
+
+    /* Take ownership; we reserve the right to write into this on-commit. */
+    bdrv_dirty_bitmap_set_busy(target_bitmap, true);
+
+    job = block_job_create(job_id, &bitpop_job_driver, txn, bs,
+                           BLK_PERM_CONSISTENT_READ,
+                           BLK_PERM_ALL & ~BLK_PERM_RESIZE,
+                           0, creation_flags,
+                           cb, opaque, errp);
+    if (!job) {
+        bdrv_dirty_bitmap_set_busy(target_bitmap, false);
+        bdrv_release_dirty_bitmap(new_bitmap);
+        return NULL;
+    }
+
+    job->bs = bs;
+    job->on_error = on_error;
+    job->target_bitmap = target_bitmap;
+    job->new_bitmap = new_bitmap;
+    job->len = len;
+    job_progress_set_remaining(&job->common.job, job->len);
+
+    return &job->common;
+}
diff --git a/blockjob.c b/blockjob.c
index 5d63b1e89d..7e450372bd 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -56,7 +56,8 @@  static bool is_block_job(Job *job)
     return job_type(job) == JOB_TYPE_BACKUP ||
            job_type(job) == JOB_TYPE_COMMIT ||
            job_type(job) == JOB_TYPE_MIRROR ||
-           job_type(job) == JOB_TYPE_STREAM;
+           job_type(job) == JOB_TYPE_STREAM ||
+           job_type(job) == JOB_TYPE_BITMAP_POPULATE;
 }
 
 BlockJob *block_job_next(BlockJob *bjob)
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 3bcb35c81d..f3cfc89d90 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -36,6 +36,7 @@  block-obj-$(CONFIG_LIBSSH) += ssh.o
 block-obj-y += accounting.o dirty-bitmap.o
 block-obj-y += write-threshold.o
 block-obj-y += backup.o
+block-obj-y += bitmap-alloc.o
 block-obj-$(CONFIG_REPLICATION) += replication.o
 block-obj-y += throttle.o copy-on-read.o
 block-obj-y += block-copy.o