diff mbox series

[v2,1/7] block-copy: streamline choice of copy_range vs. read/write

Message ID 20210518100757.31243-2-eesposit@redhat.com (mailing list archive)
State New, archived
Headers show
Series block-copy: protect block-copy internal structures | expand

Commit Message

Emanuele Giuseppe Esposito May 18, 2021, 10:07 a.m. UTC
From: Paolo Bonzini <pbonzini@redhat.com>

Put the logic to determine the copy size in a separate function, so
that there is a simple state machine for the possible methods of
copying data from one BlockDriverState to the other.

While at it, store the common computation of block_copy_max_transfer
into a new field of BlockCopyState, and make sure that we always
obey max_transfer; that's more efficient even for the
COPY_RANGE_READ_WRITE case.

Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------
 1 file changed, 39 insertions(+), 20 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy May 20, 2021, 2:42 p.m. UTC | #1
18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
> From: Paolo Bonzini <pbonzini@redhat.com>
> 
> Put the logic to determine the copy size in a separate function, so
> that there is a simple state machine for the possible methods of
> copying data from one BlockDriverState to the other.

Honestly, for me 4-state state-maching + function to determine copy-size doesn't seem better than two simple variables copy_size and use_copy_range.

What's the benefit of it?

> 
> While at it, store the common computation of block_copy_max_transfer
> into a new field of BlockCopyState, and make sure that we always
> obey max_transfer; that's more efficient even for the
> COPY_RANGE_READ_WRITE case.

hmm, maybe. It could be a separate patch.

> 
> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------

stats agree with me, that its' not a simplification.

>   1 file changed, 39 insertions(+), 20 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 37c8e8504b..10ce51a244 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -28,6 +28,13 @@
>   #define BLOCK_COPY_MAX_WORKERS 64
>   #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>   
> +typedef enum {
> +    COPY_READ_WRITE_CLUSTER,
> +    COPY_READ_WRITE,
> +    COPY_RANGE_SMALL,
> +    COPY_RANGE_FULL
> +} BlockCopyMethod;
> +
>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>   
>   typedef struct BlockCopyCallState {
> @@ -85,8 +92,8 @@ typedef struct BlockCopyState {
>       BdrvDirtyBitmap *copy_bitmap;
>       int64_t in_flight_bytes;
>       int64_t cluster_size;
> -    bool use_copy_range;
> -    int64_t copy_size;
> +    BlockCopyMethod method;
> +    int64_t max_transfer;
>       uint64_t len;
>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>       QLIST_HEAD(, BlockCopyCallState) calls;
> @@ -148,6 +155,23 @@ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
>       return true;
>   }
>   
> +static inline int64_t block_copy_chunk_size(BlockCopyState *s)

"inline" word does nothing in static definitions in c files. Compiler make a decision independently of it.

> +{
> +    switch (s->method) {
> +    case COPY_READ_WRITE_CLUSTER:
> +        return s->cluster_size;
> +    case COPY_READ_WRITE:
> +    case COPY_RANGE_SMALL:
> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
> +                   s->max_transfer);
> +    case COPY_RANGE_FULL:
> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
> +                   s->max_transfer);
> +    default:
> +        abort();
> +    }
> +}
> +
>   /*
>    * Search for the first dirty area in offset/bytes range and create task at
>    * the beginning of it.
> @@ -157,8 +181,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>                                                int64_t offset, int64_t bytes)
>   {
>       BlockCopyTask *task;
> -    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
> +    int64_t max_chunk = block_copy_chunk_size(s);
>   
> +    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
>       if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
>                                              offset, offset + bytes,
>                                              max_chunk, &offset, &bytes))
> @@ -265,28 +290,27 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>           .len = bdrv_dirty_bitmap_size(copy_bitmap),
>           .write_flags = write_flags,
>           .mem = shres_create(BLOCK_COPY_MAX_MEM),
> +        .max_transfer = QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
> +                                        , cluster_size),
>       };
>   
> -    if (block_copy_max_transfer(source, target) < cluster_size) {
> +    if (s->max_transfer < cluster_size) {
>           /*
>            * copy_range does not respect max_transfer. We don't want to bother
>            * with requests smaller than block-copy cluster size, so fallback to
>            * buffered copying (read and write respect max_transfer on their
>            * behalf).
>            */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
>       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>           /* Compression supports only cluster-size writes and no copy-range. */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
>       } else {
>           /*
>            * We enable copy-range, but keep small copy_size, until first
>            * successful copy_range (look at block_copy_do_copy).
>            */
> -        s->use_copy_range = use_copy_range;
> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>       }
>   
>       ratelimit_init(&s->rate_limit);
> @@ -369,30 +393,25 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>           return ret;
>       }
>   
> -    if (s->use_copy_range) {
> +    if (s->method >= COPY_RANGE_SMALL) {

I don't like such condition:
1. it forces me to go to enum definition to understand the logic
2. it's error prone: it's very possibly to forget to update it, when updating the enum, and logic will be broken.

No, I don't like moving to state machine for this simple thing.

>           ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
>                                    0, s->write_flags);
>           if (ret < 0) {
>               trace_block_copy_copy_range_fail(s, offset, ret);
> -            s->use_copy_range = false;
> -            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +            s->method = COPY_READ_WRITE;
>               /* Fallback to read+write with allocated buffer */
>           } else {
> -            if (s->use_copy_range) {
> +            if (s->method == COPY_RANGE_SMALL) {
>                   /*
>                    * Successful copy-range. Now increase copy_size.  copy_range
>                    * does not respect max_transfer (it's a TODO), so we factor
>                    * that in here.
>                    *
> -                 * Note: we double-check s->use_copy_range for the case when
> +                 * Note: we double-check s->method for the case when
>                    * parallel block-copy request unsets it during previous
>                    * bdrv_co_copy_range call.
>                    */
> -                s->copy_size =
> -                        MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
> -                            QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
> -                                                                    s->target),
> -                                            s->cluster_size));
> +                s->method = COPY_RANGE_FULL;
>               }
>               goto out;
>           }
>
Emanuele Giuseppe Esposito May 20, 2021, 3:06 p.m. UTC | #2
On 20/05/2021 16:42, Vladimir Sementsov-Ogievskiy wrote:
> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>> From: Paolo Bonzini <pbonzini@redhat.com>
>>
>> Put the logic to determine the copy size in a separate function, so
>> that there is a simple state machine for the possible methods of
>> copying data from one BlockDriverState to the other.
> 
> Honestly, for me 4-state state-maching + function to determine copy-size 
> doesn't seem better than two simple variables copy_size and use_copy_range.
> 
> What's the benefit of it?

It helps having a single field (method) instead of two (use_copy_range, 
copy_size), especially if we need to take care of protecting it for 
concurrent access. See patch 7.
> 
>>
>> While at it, store the common computation of block_copy_max_transfer
>> into a new field of BlockCopyState, and make sure that we always
>> obey max_transfer; that's more efficient even for the
>> COPY_RANGE_READ_WRITE case.
> 
> hmm, maybe. It could be a separate patch.
> 
>>
>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------
> 
> stats agree with me, that its' not a simplification.
> 
>>   1 file changed, 39 insertions(+), 20 deletions(-)
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 37c8e8504b..10ce51a244 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -28,6 +28,13 @@
>>   #define BLOCK_COPY_MAX_WORKERS 64
>>   #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>> +typedef enum {
>> +    COPY_READ_WRITE_CLUSTER,
>> +    COPY_READ_WRITE,
>> +    COPY_RANGE_SMALL,
>> +    COPY_RANGE_FULL
>> +} BlockCopyMethod;
>> +
>>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>>   typedef struct BlockCopyCallState {
>> @@ -85,8 +92,8 @@ typedef struct BlockCopyState {
>>       BdrvDirtyBitmap *copy_bitmap;
>>       int64_t in_flight_bytes;
>>       int64_t cluster_size;
>> -    bool use_copy_range;
>> -    int64_t copy_size;
>> +    BlockCopyMethod method;
>> +    int64_t max_transfer;
>>       uint64_t len;
>>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all 
>> block-copy calls */
>>       QLIST_HEAD(, BlockCopyCallState) calls;
>> @@ -148,6 +155,23 @@ static bool coroutine_fn 
>> block_copy_wait_one(BlockCopyState *s, int64_t offset,
>>       return true;
>>   }
>> +static inline int64_t block_copy_chunk_size(BlockCopyState *s)
> 
> "inline" word does nothing in static definitions in c files. Compiler 
> make a decision independently of it.

Typo
> 
>> +{
>> +    switch (s->method) {
>> +    case COPY_READ_WRITE_CLUSTER:
>> +        return s->cluster_size;
>> +    case COPY_READ_WRITE:
>> +    case COPY_RANGE_SMALL:
>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
>> +                   s->max_transfer);
>> +    case COPY_RANGE_FULL:
>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>> +                   s->max_transfer);
>> +    default:
>> +        abort();
>> +    }
>> +}
>> +
>>   /*
>>    * Search for the first dirty area in offset/bytes range and create 
>> task at
>>    * the beginning of it.
>> @@ -157,8 +181,9 @@ static BlockCopyTask 
>> *block_copy_task_create(BlockCopyState *s,
>>                                                int64_t offset, int64_t 
>> bytes)
>>   {
>>       BlockCopyTask *task;
>> -    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, 
>> call_state->max_chunk);
>> +    int64_t max_chunk = block_copy_chunk_size(s);
>> +    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
>>       if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
>>                                              offset, offset + bytes,
>>                                              max_chunk, &offset, &bytes))
>> @@ -265,28 +290,27 @@ BlockCopyState *block_copy_state_new(BdrvChild 
>> *source, BdrvChild *target,
>>           .len = bdrv_dirty_bitmap_size(copy_bitmap),
>>           .write_flags = write_flags,
>>           .mem = shres_create(BLOCK_COPY_MAX_MEM),
>> +        .max_transfer = 
>> QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
>> +                                        , cluster_size),
>>       };
>> -    if (block_copy_max_transfer(source, target) < cluster_size) {
>> +    if (s->max_transfer < cluster_size) {
>>           /*
>>            * copy_range does not respect max_transfer. We don't want 
>> to bother
>>            * with requests smaller than block-copy cluster size, so 
>> fallback to
>>            * buffered copying (read and write respect max_transfer on 
>> their
>>            * behalf).
>>            */
>> -        s->use_copy_range = false;
>> -        s->copy_size = cluster_size;
>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>>           /* Compression supports only cluster-size writes and no 
>> copy-range. */
>> -        s->use_copy_range = false;
>> -        s->copy_size = cluster_size;
>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>       } else {
>>           /*
>>            * We enable copy-range, but keep small copy_size, until first
>>            * successful copy_range (look at block_copy_do_copy).
>>            */
>> -        s->use_copy_range = use_copy_range;
>> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>>       }
>>       ratelimit_init(&s->rate_limit);
>> @@ -369,30 +393,25 @@ static int coroutine_fn 
>> block_copy_do_copy(BlockCopyState *s,
>>           return ret;
>>       }
>> -    if (s->use_copy_range) {
>> +    if (s->method >= COPY_RANGE_SMALL) {
> 
> I don't like such condition:
> 1. it forces me to go to enum definition to understand the logic
> 2. it's error prone: it's very possibly to forget to update it, when 
> updating the enum, and logic will be broken.
> 
> No, I don't like moving to state machine for this simple thing.
> 
>>           ret = bdrv_co_copy_range(s->source, offset, s->target, 
>> offset, nbytes,
>>                                    0, s->write_flags);
>>           if (ret < 0) {
>>               trace_block_copy_copy_range_fail(s, offset, ret);
>> -            s->use_copy_range = false;
>> -            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>> +            s->method = COPY_READ_WRITE;
>>               /* Fallback to read+write with allocated buffer */
>>           } else {
>> -            if (s->use_copy_range) {
>> +            if (s->method == COPY_RANGE_SMALL) {
>>                   /*
>>                    * Successful copy-range. Now increase copy_size.  
>> copy_range
>>                    * does not respect max_transfer (it's a TODO), so 
>> we factor
>>                    * that in here.
>>                    *
>> -                 * Note: we double-check s->use_copy_range for the 
>> case when
>> +                 * Note: we double-check s->method for the case when
>>                    * parallel block-copy request unsets it during 
>> previous
>>                    * bdrv_co_copy_range call.
>>                    */
>> -                s->copy_size =
>> -                        MIN(MAX(s->cluster_size, 
>> BLOCK_COPY_MAX_COPY_RANGE),
>> -                            
>> QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
>> -                                                                    
>> s->target),
>> -                                            s->cluster_size));
>> +                s->method = COPY_RANGE_FULL;
>>               }
>>               goto out;
>>           }
>>
> 
>
Vladimir Sementsov-Ogievskiy May 20, 2021, 3:24 p.m. UTC | #3
20.05.2021 18:06, Emanuele Giuseppe Esposito wrote:
> 
> 
> On 20/05/2021 16:42, Vladimir Sementsov-Ogievskiy wrote:
>> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>>> From: Paolo Bonzini <pbonzini@redhat.com>
>>>
>>> Put the logic to determine the copy size in a separate function, so
>>> that there is a simple state machine for the possible methods of
>>> copying data from one BlockDriverState to the other.
>>
>> Honestly, for me 4-state state-maching + function to determine copy-size doesn't seem better than two simple variables copy_size and use_copy_range.
>>
>> What's the benefit of it?
> 
> It helps having a single field (method) instead of two (use_copy_range, copy_size), especially if we need to take care of protecting it for concurrent access. See patch 7.

What's the problem with protecting two fields?

(me looking at patch 7) What's the reason of introducing atomic operations? It makes things more complicated (we have two synchronization mechanisms (mutex + atomics) instead of one (mutext)), with no benefit.

>>
>>>
>>> While at it, store the common computation of block_copy_max_transfer
>>> into a new field of BlockCopyState, and make sure that we always
>>> obey max_transfer; that's more efficient even for the
>>> COPY_RANGE_READ_WRITE case.
>>
>> hmm, maybe. It could be a separate patch.
>>
>>>
>>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>   block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------
>>
>> stats agree with me, that its' not a simplification.
>>
>>>   1 file changed, 39 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/block/block-copy.c b/block/block-copy.c
>>> index 37c8e8504b..10ce51a244 100644
>>> --- a/block/block-copy.c
>>> +++ b/block/block-copy.c
>>> @@ -28,6 +28,13 @@
>>>   #define BLOCK_COPY_MAX_WORKERS 64
>>>   #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>>> +typedef enum {
>>> +    COPY_READ_WRITE_CLUSTER,
>>> +    COPY_READ_WRITE,
>>> +    COPY_RANGE_SMALL,
>>> +    COPY_RANGE_FULL
>>> +} BlockCopyMethod;
>>> +
>>>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>>>   typedef struct BlockCopyCallState {
>>> @@ -85,8 +92,8 @@ typedef struct BlockCopyState {
>>>       BdrvDirtyBitmap *copy_bitmap;
>>>       int64_t in_flight_bytes;
>>>       int64_t cluster_size;
>>> -    bool use_copy_range;
>>> -    int64_t copy_size;
>>> +    BlockCopyMethod method;
>>> +    int64_t max_transfer;
>>>       uint64_t len;
>>>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
>>>       QLIST_HEAD(, BlockCopyCallState) calls;
>>> @@ -148,6 +155,23 @@ static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
>>>       return true;
>>>   }
>>> +static inline int64_t block_copy_chunk_size(BlockCopyState *s)
>>
>> "inline" word does nothing in static definitions in c files. Compiler make a decision independently of it.
> 
> Typo
>>
>>> +{
>>> +    switch (s->method) {
>>> +    case COPY_READ_WRITE_CLUSTER:
>>> +        return s->cluster_size;
>>> +    case COPY_READ_WRITE:
>>> +    case COPY_RANGE_SMALL:
>>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
>>> +                   s->max_transfer);
>>> +    case COPY_RANGE_FULL:
>>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>>> +                   s->max_transfer);
>>> +    default:
>>> +        abort();
>>> +    }
>>> +}
>>> +
>>>   /*
>>>    * Search for the first dirty area in offset/bytes range and create task at
>>>    * the beginning of it.
>>> @@ -157,8 +181,9 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
>>>                                                int64_t offset, int64_t bytes)
>>>   {
>>>       BlockCopyTask *task;
>>> -    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
>>> +    int64_t max_chunk = block_copy_chunk_size(s);
>>> +    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
>>>       if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
>>>                                              offset, offset + bytes,
>>>                                              max_chunk, &offset, &bytes))
>>> @@ -265,28 +290,27 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>>>           .len = bdrv_dirty_bitmap_size(copy_bitmap),
>>>           .write_flags = write_flags,
>>>           .mem = shres_create(BLOCK_COPY_MAX_MEM),
>>> +        .max_transfer = QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
>>> +                                        , cluster_size),
>>>       };
>>> -    if (block_copy_max_transfer(source, target) < cluster_size) {
>>> +    if (s->max_transfer < cluster_size) {
>>>           /*
>>>            * copy_range does not respect max_transfer. We don't want to bother
>>>            * with requests smaller than block-copy cluster size, so fallback to
>>>            * buffered copying (read and write respect max_transfer on their
>>>            * behalf).
>>>            */
>>> -        s->use_copy_range = false;
>>> -        s->copy_size = cluster_size;
>>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>>       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>>>           /* Compression supports only cluster-size writes and no copy-range. */
>>> -        s->use_copy_range = false;
>>> -        s->copy_size = cluster_size;
>>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>>       } else {
>>>           /*
>>>            * We enable copy-range, but keep small copy_size, until first
>>>            * successful copy_range (look at block_copy_do_copy).
>>>            */
>>> -        s->use_copy_range = use_copy_range;
>>> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>>> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>>>       }
>>>       ratelimit_init(&s->rate_limit);
>>> @@ -369,30 +393,25 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
>>>           return ret;
>>>       }
>>> -    if (s->use_copy_range) {
>>> +    if (s->method >= COPY_RANGE_SMALL) {
>>
>> I don't like such condition:
>> 1. it forces me to go to enum definition to understand the logic
>> 2. it's error prone: it's very possibly to forget to update it, when updating the enum, and logic will be broken.
>>
>> No, I don't like moving to state machine for this simple thing.
>>
>>>           ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
>>>                                    0, s->write_flags);
>>>           if (ret < 0) {
>>>               trace_block_copy_copy_range_fail(s, offset, ret);
>>> -            s->use_copy_range = false;
>>> -            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>>> +            s->method = COPY_READ_WRITE;
>>>               /* Fallback to read+write with allocated buffer */
>>>           } else {
>>> -            if (s->use_copy_range) {
>>> +            if (s->method == COPY_RANGE_SMALL) {
>>>                   /*
>>>                    * Successful copy-range. Now increase copy_size. copy_range
>>>                    * does not respect max_transfer (it's a TODO), so we factor
>>>                    * that in here.
>>>                    *
>>> -                 * Note: we double-check s->use_copy_range for the case when
>>> +                 * Note: we double-check s->method for the case when
>>>                    * parallel block-copy request unsets it during previous
>>>                    * bdrv_co_copy_range call.
>>>                    */
>>> -                s->copy_size =
>>> -                        MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>>> - QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
>>> - s->target),
>>> -                                            s->cluster_size));
>>> +                s->method = COPY_RANGE_FULL;
>>>               }
>>>               goto out;
>>>           }
>>>
>>
>>
>
Paolo Bonzini May 21, 2021, 3:10 p.m. UTC | #4
On 20/05/21 16:42, Vladimir Sementsov-Ogievskiy wrote:
> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>> From: Paolo Bonzini <pbonzini@redhat.com>
>>
>> Put the logic to determine the copy size in a separate function, so
>> that there is a simple state machine for the possible methods of
>> copying data from one BlockDriverState to the other.
> 
> Honestly, for me 4-state state-maching + function to determine copy-size 
> doesn't seem better than two simple variables copy_size and use_copy_range.

There were six states before (2 for s->use_copy_range, three for s->copy_size),
of which two were unused.  The heuristics for going between copy and read/write
were quite illegible.

> What's the benefit of it?

Less duplication here, for example:

+    if (s->max_transfer < cluster_size) {
           /*
            * copy_range does not respect max_transfer. We don't want to bother
            * with requests smaller than block-copy cluster size, so fallback to
            * buffered copying (read and write respect max_transfer on their
            * behalf).
            */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
+        s->method = COPY_READ_WRITE_CLUSTER;
       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
           /* Compression supports only cluster-size writes and no copy-range. */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
+        s->method = COPY_READ_WRITE_CLUSTER;

and here:

               trace_block_copy_copy_range_fail(s, offset, ret);
-            s->use_copy_range = false;
-            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
+            s->method = COPY_READ_WRITE;
...
           /*
            * We enable copy-range, but keep small copy_size, until first
            * successful copy_range (look at block_copy_do_copy).
            */
-        s->use_copy_range = use_copy_range;
-        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
+        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;

where it's not obvious that the two assignments to copy_size should be
the same (and they're suboptimal, too, since they don't obey max_transfer).

... plus...

>> While at it, store the common computation of block_copy_max_transfer
>> into a new field of BlockCopyState, and make sure that we always
>> obey max_transfer; that's more efficient even for the
>> COPY_RANGE_READ_WRITE case.

having a function makes it easier to spot slight differences that are
just bugs, such as this one.

>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>   block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------
> 
> stats agree with me, that its' not a simplification.

Stats don't say everything.  Not having something like this:

                 s->copy_size =
                         MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
                             QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
                                                                     s->target),
                                             s->cluster_size));

in the inner loop is already worth the extra lines for the
function declaration, for example.

Paolo

>>   1 file changed, 39 insertions(+), 20 deletions(-)
>>
>> diff --git a/block/block-copy.c b/block/block-copy.c
>> index 37c8e8504b..10ce51a244 100644
>> --- a/block/block-copy.c
>> +++ b/block/block-copy.c
>> @@ -28,6 +28,13 @@
>>   #define BLOCK_COPY_MAX_WORKERS 64
>>   #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
>> +typedef enum {
>> +    COPY_READ_WRITE_CLUSTER,
>> +    COPY_READ_WRITE,
>> +    COPY_RANGE_SMALL,
>> +    COPY_RANGE_FULL
>> +} BlockCopyMethod;
>> +
>>   static coroutine_fn int block_copy_task_entry(AioTask *task);
>>   typedef struct BlockCopyCallState {
>> @@ -85,8 +92,8 @@ typedef struct BlockCopyState {
>>       BdrvDirtyBitmap *copy_bitmap;
>>       int64_t in_flight_bytes;
>>       int64_t cluster_size;
>> -    bool use_copy_range;
>> -    int64_t copy_size;
>> +    BlockCopyMethod method;
>> +    int64_t max_transfer;
>>       uint64_t len;
>>       QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all 
>> block-copy calls */
>>       QLIST_HEAD(, BlockCopyCallState) calls;
>> @@ -148,6 +155,23 @@ static bool coroutine_fn 
>> block_copy_wait_one(BlockCopyState *s, int64_t offset,
>>       return true;
>>   }
>> +static inline int64_t block_copy_chunk_size(BlockCopyState *s)
> 
> "inline" word does nothing in static definitions in c files. Compiler 
> make a decision independently of it.
> 
>> +{
>> +    switch (s->method) {
>> +    case COPY_READ_WRITE_CLUSTER:
>> +        return s->cluster_size;
>> +    case COPY_READ_WRITE:
>> +    case COPY_RANGE_SMALL:
>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
>> +                   s->max_transfer);
>> +    case COPY_RANGE_FULL:
>> +        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>> +                   s->max_transfer);
>> +    default:
>> +        abort();
>> +    }
>> +}
>> +
>>   /*
>>    * Search for the first dirty area in offset/bytes range and create 
>> task at
>>    * the beginning of it.
>> @@ -157,8 +181,9 @@ static BlockCopyTask 
>> *block_copy_task_create(BlockCopyState *s,
>>                                                int64_t offset, int64_t 
>> bytes)
>>   {
>>       BlockCopyTask *task;
>> -    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, 
>> call_state->max_chunk);
>> +    int64_t max_chunk = block_copy_chunk_size(s);
>> +    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
>>       if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
>>                                              offset, offset + bytes,
>>                                              max_chunk, &offset, &bytes))
>> @@ -265,28 +290,27 @@ BlockCopyState *block_copy_state_new(BdrvChild 
>> *source, BdrvChild *target,
>>           .len = bdrv_dirty_bitmap_size(copy_bitmap),
>>           .write_flags = write_flags,
>>           .mem = shres_create(BLOCK_COPY_MAX_MEM),
>> +        .max_transfer = 
>> QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
>> +                                        , cluster_size),
>>       };
>> -    if (block_copy_max_transfer(source, target) < cluster_size) {
>> +    if (s->max_transfer < cluster_size) {
>>           /*
>>            * copy_range does not respect max_transfer. We don't want 
>> to bother
>>            * with requests smaller than block-copy cluster size, so 
>> fallback to
>>            * buffered copying (read and write respect max_transfer on 
>> their
>>            * behalf).
>>            */
>> -        s->use_copy_range = false;
>> -        s->copy_size = cluster_size;
>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>       } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>>           /* Compression supports only cluster-size writes and no 
>> copy-range. */
>> -        s->use_copy_range = false;
>> -        s->copy_size = cluster_size;
>> +        s->method = COPY_READ_WRITE_CLUSTER;
>>       } else {
>>           /*
>>            * We enable copy-range, but keep small copy_size, until first
>>            * successful copy_range (look at block_copy_do_copy).
>>            */
>> -        s->use_copy_range = use_copy_range;
>> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>>       }
>>       ratelimit_init(&s->rate_limit);
>> @@ -369,30 +393,25 @@ static int coroutine_fn 
>> block_copy_do_copy(BlockCopyState *s,
>>           return ret;
>>       }
>> -    if (s->use_copy_range) {
>> +    if (s->method >= COPY_RANGE_SMALL) {
> 
> I don't like such condition:
> 1. it forces me to go to enum definition to understand the logic
> 2. it's error prone: it's very possibly to forget to update it, when 
> updating the enum, and logic will be broken.
> 
> No, I don't like moving to state machine for this simple thing.
> 
>>           ret = bdrv_co_copy_range(s->source, offset, s->target, 
>> offset, nbytes,
>>                                    0, s->write_flags);
>>           if (ret < 0) {
>>               trace_block_copy_copy_range_fail(s, offset, ret);
>> -            s->use_copy_range = false;
>> -            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
>> +            s->method = COPY_READ_WRITE;
>>               /* Fallback to read+write with allocated buffer */
>>           } else {
>> -            if (s->use_copy_range) {
>> +            if (s->method == COPY_RANGE_SMALL) {
>>                   /*
>>                    * Successful copy-range. Now increase copy_size.  
>> copy_range
>>                    * does not respect max_transfer (it's a TODO), so 
>> we factor
>>                    * that in here.
>>                    *
>> -                 * Note: we double-check s->use_copy_range for the 
>> case when
>> +                 * Note: we double-check s->method for the case when
>>                    * parallel block-copy request unsets it during 
>> previous
>>                    * bdrv_co_copy_range call.
>>                    */
>> -                s->copy_size =
>> -                        MIN(MAX(s->cluster_size, 
>> BLOCK_COPY_MAX_COPY_RANGE),
>> -                            
>> QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
>> -                                                                    
>> s->target),
>> -                                            s->cluster_size));
>> +                s->method = COPY_RANGE_FULL;
>>               }
>>               goto out;
>>           }
>>
> 
>
Vladimir Sementsov-Ogievskiy May 21, 2021, 3:48 p.m. UTC | #5
21.05.2021 18:10, Paolo Bonzini wrote:
> On 20/05/21 16:42, Vladimir Sementsov-Ogievskiy wrote:
>> 18.05.2021 13:07, Emanuele Giuseppe Esposito wrote:
>>> From: Paolo Bonzini <pbonzini@redhat.com>
>>>
>>> Put the logic to determine the copy size in a separate function, so
>>> that there is a simple state machine for the possible methods of
>>> copying data from one BlockDriverState to the other.
>>
>> Honestly, for me 4-state state-maching + function to determine copy-size doesn't seem better than two simple variables copy_size and use_copy_range.
> 
> There were six states before (2 for s->use_copy_range, three for s->copy_size),
> of which two were unused.  The heuristics for going between copy and read/write
> were quite illegible.
> 
>> What's the benefit of it?
> 
> Less duplication here, for example:
> 
> +    if (s->max_transfer < cluster_size) {
>            /*
>             * copy_range does not respect max_transfer. We don't want to bother
>             * with requests smaller than block-copy cluster size, so fallback to
>             * buffered copying (read and write respect max_transfer on their
>             * behalf).
>             */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
>        } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
>            /* Compression supports only cluster-size writes and no copy-range. */
> -        s->use_copy_range = false;
> -        s->copy_size = cluster_size;
> +        s->method = COPY_READ_WRITE_CLUSTER;
> 
> and here:
> 
>                trace_block_copy_copy_range_fail(s, offset, ret);
> -            s->use_copy_range = false;
> -            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +            s->method = COPY_READ_WRITE;
> ...
>            /*
>             * We enable copy-range, but keep small copy_size, until first
>             * successful copy_range (look at block_copy_do_copy).
>             */
> -        s->use_copy_range = use_copy_range;
> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
> 
> where it's not obvious that the two assignments to copy_size should be
> the same (and they're suboptimal, too, since they don't obey max_transfer).
> 
> ... plus...
> 
>>> While at it, store the common computation of block_copy_max_transfer
>>> into a new field of BlockCopyState, and make sure that we always
>>> obey max_transfer; that's more efficient even for the
>>> COPY_RANGE_READ_WRITE case.
> 
> having a function makes it easier to spot slight differences that are
> just bugs, such as this one.
> 
>>> Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>   block/block-copy.c | 59 ++++++++++++++++++++++++++++++----------------
>>
>> stats agree with me, that its' not a simplification.
> 
> Stats don't say everything.  Not having something like this:
> 
>                  s->copy_size =
>                          MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>                              QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
>                                                                      s->target),
>                                              s->cluster_size));
> 
> in the inner loop is already worth the extra lines for the
> function declaration, for example.


After my "[PATCH v2 00/33] block: publish backup-top filter" copy_range path becomes unused. I keep it thinking about further moving qemu-img convert to block-copy. But I don't even have a plan when to start this work. So, if we want to do something around copy_range here to prepare for thread-safety, let's just drop it for now as unused on top of "[PATCH v2 00/33] block: publish backup-top filter" (you can take one patch that drop copy_range support in backup to your series to not make a dependency). It's not difficult to reimplement it later.
Paolo Bonzini May 21, 2021, 4:43 p.m. UTC | #6
On 21/05/21 17:48, Vladimir Sementsov-Ogievskiy wrote:
> I keep it thinking about further moving qemu-img convert to block-copy. 
> But I don't even have a plan when to start this work. So, if we want to 
> do something around copy_range here to prepare for thread-safety, let's 
> just drop it for now as unused on top of "[PATCH v2 00/33] block: 
> publish backup-top filter" (you can take one patch that drop copy_range 
> support in backup to your series to not make a dependency). It's not 
> difficult to reimplement it later.

Then we'll have this same conversation later.  I would hope that this 
series goes in before yours.

Paolo
Vladimir Sementsov-Ogievskiy May 21, 2021, 5:51 p.m. UTC | #7
21.05.2021 18:10, Paolo Bonzini wrote:
> Stats don't say everything.  Not having something like this:
> 
>                  s->copy_size =
>                          MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
>                              QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
>                                                                      s->target),
>                                              s->cluster_size));


Yes, this one always looked bad..
Stefan Hajnoczi May 27, 2021, 8:20 a.m. UTC | #8
On Tue, May 18, 2021 at 12:07:51PM +0200, Emanuele Giuseppe Esposito wrote:
>      } else {
>          /*
>           * We enable copy-range, but keep small copy_size, until first
>           * successful copy_range (look at block_copy_do_copy).
>           */

Is this comment still correct? It appears that this branch of the if
statement does not always enable copy-range (the !use_copy_range case).

> -        s->use_copy_range = use_copy_range;
> -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
Paolo Bonzini May 27, 2021, 7:04 p.m. UTC | #9
Il gio 27 mag 2021, 14:07 Stefan Hajnoczi <stefanha@redhat.com> ha scritto:

> On Tue, May 18, 2021 at 12:07:51PM +0200, Emanuele Giuseppe Esposito wrote:
> >      } else {
> >          /*
> >           * We enable copy-range, but keep small copy_size, until first
> >           * successful copy_range (look at block_copy_do_copy).
> >           */
>
> Is this comment still correct? It appears that this branch of the if
> statement does not always enable copy-range (the !use_copy_range case).
>

It is correct, but it applies only if use_copy_range is true (the patch
doesn't change this).

Paolo

> -        s->use_copy_range = use_copy_range;
> > -        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
> > +        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
>
diff mbox series

Patch

diff --git a/block/block-copy.c b/block/block-copy.c
index 37c8e8504b..10ce51a244 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -28,6 +28,13 @@ 
 #define BLOCK_COPY_MAX_WORKERS 64
 #define BLOCK_COPY_SLICE_TIME 100000000ULL /* ns */
 
+typedef enum {
+    COPY_READ_WRITE_CLUSTER,
+    COPY_READ_WRITE,
+    COPY_RANGE_SMALL,
+    COPY_RANGE_FULL
+} BlockCopyMethod;
+
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
 typedef struct BlockCopyCallState {
@@ -85,8 +92,8 @@  typedef struct BlockCopyState {
     BdrvDirtyBitmap *copy_bitmap;
     int64_t in_flight_bytes;
     int64_t cluster_size;
-    bool use_copy_range;
-    int64_t copy_size;
+    BlockCopyMethod method;
+    int64_t max_transfer;
     uint64_t len;
     QLIST_HEAD(, BlockCopyTask) tasks; /* All tasks from all block-copy calls */
     QLIST_HEAD(, BlockCopyCallState) calls;
@@ -148,6 +155,23 @@  static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
     return true;
 }
 
+static inline int64_t block_copy_chunk_size(BlockCopyState *s)
+{
+    switch (s->method) {
+    case COPY_READ_WRITE_CLUSTER:
+        return s->cluster_size;
+    case COPY_READ_WRITE:
+    case COPY_RANGE_SMALL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER),
+                   s->max_transfer);
+    case COPY_RANGE_FULL:
+        return MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
+                   s->max_transfer);
+    default:
+        abort();
+    }
+}
+
 /*
  * Search for the first dirty area in offset/bytes range and create task at
  * the beginning of it.
@@ -157,8 +181,9 @@  static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
                                              int64_t offset, int64_t bytes)
 {
     BlockCopyTask *task;
-    int64_t max_chunk = MIN_NON_ZERO(s->copy_size, call_state->max_chunk);
+    int64_t max_chunk = block_copy_chunk_size(s);
 
+    max_chunk = MIN_NON_ZERO(max_chunk, call_state->max_chunk);
     if (!bdrv_dirty_bitmap_next_dirty_area(s->copy_bitmap,
                                            offset, offset + bytes,
                                            max_chunk, &offset, &bytes))
@@ -265,28 +290,27 @@  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
         .len = bdrv_dirty_bitmap_size(copy_bitmap),
         .write_flags = write_flags,
         .mem = shres_create(BLOCK_COPY_MAX_MEM),
+        .max_transfer = QEMU_ALIGN_DOWN(block_copy_max_transfer(source, target)
+                                        , cluster_size),
     };
 
-    if (block_copy_max_transfer(source, target) < cluster_size) {
+    if (s->max_transfer < cluster_size) {
         /*
          * copy_range does not respect max_transfer. We don't want to bother
          * with requests smaller than block-copy cluster size, so fallback to
          * buffered copying (read and write respect max_transfer on their
          * behalf).
          */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
+        s->method = COPY_READ_WRITE_CLUSTER;
     } else if (write_flags & BDRV_REQ_WRITE_COMPRESSED) {
         /* Compression supports only cluster-size writes and no copy-range. */
-        s->use_copy_range = false;
-        s->copy_size = cluster_size;
+        s->method = COPY_READ_WRITE_CLUSTER;
     } else {
         /*
          * We enable copy-range, but keep small copy_size, until first
          * successful copy_range (look at block_copy_do_copy).
          */
-        s->use_copy_range = use_copy_range;
-        s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
+        s->method = use_copy_range ? COPY_RANGE_SMALL : COPY_READ_WRITE;
     }
 
     ratelimit_init(&s->rate_limit);
@@ -369,30 +393,25 @@  static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
         return ret;
     }
 
-    if (s->use_copy_range) {
+    if (s->method >= COPY_RANGE_SMALL) {
         ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
                                  0, s->write_flags);
         if (ret < 0) {
             trace_block_copy_copy_range_fail(s, offset, ret);
-            s->use_copy_range = false;
-            s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
+            s->method = COPY_READ_WRITE;
             /* Fallback to read+write with allocated buffer */
         } else {
-            if (s->use_copy_range) {
+            if (s->method == COPY_RANGE_SMALL) {
                 /*
                  * Successful copy-range. Now increase copy_size.  copy_range
                  * does not respect max_transfer (it's a TODO), so we factor
                  * that in here.
                  *
-                 * Note: we double-check s->use_copy_range for the case when
+                 * Note: we double-check s->method for the case when
                  * parallel block-copy request unsets it during previous
                  * bdrv_co_copy_range call.
                  */
-                s->copy_size =
-                        MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
-                            QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
-                                                                    s->target),
-                                            s->cluster_size));
+                s->method = COPY_RANGE_FULL;
             }
             goto out;
         }