diff mbox series

[1/2] copy-before-write: allow specifying minimum cluster size

Message ID 20240308155158.830258-2-f.ebner@proxmox.com (mailing list archive)
State New, archived
Headers show
Series backup: allow specifying minimum cluster size | expand

Commit Message

Fiona Ebner March 8, 2024, 3:51 p.m. UTC
Useful to make discard-source work in the context of backup fleecing
when the fleecing image has a larger granularity than the backup
target.

Copy-before-write operations will use at least this granularity and in
particular, discard requests to the source node will too. If the
granularity is too small, they will just be aligned down in
cbw_co_pdiscard_snapshot() and thus effectively ignored.

The QAPI uses uint32 so the value will be non-negative, but still fit
into a uint64_t.

Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 block/block-copy.c         | 17 +++++++++++++----
 block/copy-before-write.c  |  3 ++-
 include/block/block-copy.h |  1 +
 qapi/block-core.json       |  8 +++++++-
 4 files changed, 23 insertions(+), 6 deletions(-)

Comments

Markus Armbruster March 26, 2024, 9:06 a.m. UTC | #1
Fiona Ebner <f.ebner@proxmox.com> writes:

> Useful to make discard-source work in the context of backup fleecing
> when the fleecing image has a larger granularity than the backup
> target.
>
> Copy-before-write operations will use at least this granularity and in
> particular, discard requests to the source node will too. If the
> granularity is too small, they will just be aligned down in
> cbw_co_pdiscard_snapshot() and thus effectively ignored.
>
> The QAPI uses uint32 so the value will be non-negative, but still fit
> into a uint64_t.
>
> Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  block/block-copy.c         | 17 +++++++++++++----
>  block/copy-before-write.c  |  3 ++-
>  include/block/block-copy.h |  1 +
>  qapi/block-core.json       |  8 +++++++-
>  4 files changed, 23 insertions(+), 6 deletions(-)
>
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 7e3b378528..adb1cbb440 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
>  }
>  
>  static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
> +                                                 int64_t min_cluster_size,
>                                                   Error **errp)
>  {
>      int ret;
> @@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
>                      "used. If the actual block size of the target exceeds "
>                      "this default, the backup may be unusable",
>                      BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
> -        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
> +        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);

min_cluster_size is int64_t, BLOCK_COPY_CLUSTER_SIZE_DEFAULT is int, and
gets converted to int64_t.  The return type is int64_t.  Okay.

>      } else if (ret < 0 && !target_does_cow) {
>          error_setg_errno(errp, -ret,
>              "Couldn't determine the cluster size of the target image, "
> @@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
>          return ret;
>      } else if (ret < 0 && target_does_cow) {
>          /* Not fatal; just trudge on ahead. */
> -        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
> +        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);

Same.

>      }
>  
> -    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
> +    return MAX(min_cluster_size,
> +               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));

Similar: bdi.cluster_size is int.

>  }
>  
>  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>                                       BlockDriverState *copy_bitmap_bs,
>                                       const BdrvDirtyBitmap *bitmap,
>                                       bool discard_source,
> +                                     int64_t min_cluster_size,
>                                       Error **errp)
>  {
>      ERRP_GUARD();
> @@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>  
>      GLOBAL_STATE_CODE();
>  
> -    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
> +    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {

min_cluster_size is int64_t, is_power_of_2() takes uint64_t.  Bad if
min_cluster_size is negative.  Could this happen?


> +        error_setg(errp, "min-cluster-size needs to be a power of 2");
> +        return NULL;
> +    }
> +
> +    cluster_size = block_copy_calculate_cluster_size(target->bs,
> +                                                     min_cluster_size, errp);

min_cluster_size is int64_t, block_copy_calculate_cluster_size() takes
int64_t, returns int64_t, and cluster_size is int64_t.  Good.

>      if (cluster_size < 0) {
>          return NULL;
>      }
> diff --git a/block/copy-before-write.c b/block/copy-before-write.c
> index dac57481c5..f9896c6c1e 100644
> --- a/block/copy-before-write.c
> +++ b/block/copy-before-write.c
> @@ -476,7 +476,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
>  
>      s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
>      s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
> -                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
> +                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
> +                                  opts->min_cluster_size, errp);

@opts is BlockdevOptionsCbw *, opts->min_cluster_size is uint32_t (see
last hunk), block_copy_state_new() takes int64_t: opts->min_cluster_size
gets zero-extended.  Okay.

>      if (!s->bcs) {
>          error_prepend(errp, "Cannot create block-copy-state: ");
>          return -EINVAL;
> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
> index bdc703bacd..77857c6c68 100644
> --- a/include/block/block-copy.h
> +++ b/include/block/block-copy.h
> @@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>                                       BlockDriverState *copy_bitmap_bs,
>                                       const BdrvDirtyBitmap *bitmap,
>                                       bool discard_source,
> +                                     int64_t min_cluster_size,
>                                       Error **errp);
>  
>  /* Function should be called prior any actual copy request */
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 0a72c590a8..85c8f88f6e 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4625,12 +4625,18 @@
>  #     @on-cbw-error parameter will decide how this failure is handled.
>  #     Default 0. (Since 7.1)
>  #
> +# @min-cluster-size: Minimum size of blocks used by copy-before-write
> +#     operations.  Has to be a power of 2.  No effect if smaller than
> +#     the maximum of the target's cluster size and 64 KiB.  Default 0.
> +#     (Since 9.0)
> +#
>  # Since: 6.2
>  ##
>  { 'struct': 'BlockdevOptionsCbw',
>    'base': 'BlockdevOptionsGenericFormat',
>    'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
> -            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
> +            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
> +            '*min-cluster-size': 'uint32' } }

Elsewhere in the schema, we use either 'int' or 'size' for cluster-size.
Why the difference?

>  
>  ##
>  # @BlockdevOptions:
Vladimir Sementsov-Ogievskiy March 29, 2024, 10:10 a.m. UTC | #2
On 08.03.24 18:51, Fiona Ebner wrote:
> Useful to make discard-source work in the context of backup fleecing
> when the fleecing image has a larger granularity than the backup
> target.
> 
> Copy-before-write operations will use at least this granularity and in
> particular, discard requests to the source node will too. If the
> granularity is too small, they will just be aligned down in
> cbw_co_pdiscard_snapshot() and thus effectively ignored.
> 
> The QAPI uses uint32 so the value will be non-negative, but still fit
> into a uint64_t.
> 
> Suggested-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>   block/block-copy.c         | 17 +++++++++++++----
>   block/copy-before-write.c  |  3 ++-
>   include/block/block-copy.h |  1 +
>   qapi/block-core.json       |  8 +++++++-
>   4 files changed, 23 insertions(+), 6 deletions(-)
> 
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 7e3b378528..adb1cbb440 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
>   }
>   
>   static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
> +                                                 int64_t min_cluster_size,

Maybe better use uint32_t here as well.

>                                                    Error **errp)
>   {
>       int ret;
> @@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
>                       "used. If the actual block size of the target exceeds "
>                       "this default, the backup may be unusable",
>                       BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
> -        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
> +        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
>       } else if (ret < 0 && !target_does_cow) {
>           error_setg_errno(errp, -ret,
>               "Couldn't determine the cluster size of the target image, "
> @@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
>           return ret;
>       } else if (ret < 0 && target_does_cow) {
>           /* Not fatal; just trudge on ahead. */
> -        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
> +        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
>       }
>   
> -    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
> +    return MAX(min_cluster_size,
> +               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
>   }
>   
>   BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>                                        BlockDriverState *copy_bitmap_bs,
>                                        const BdrvDirtyBitmap *bitmap,
>                                        bool discard_source,
> +                                     int64_t min_cluster_size,

and here why not uint32_t

>                                        Error **errp)
>   {
>       ERRP_GUARD();
> @@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>   
>       GLOBAL_STATE_CODE();
>   
> -    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
> +    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
> +        error_setg(errp, "min-cluster-size needs to be a power of 2");
> +        return NULL;
> +    }
> +
> +    cluster_size = block_copy_calculate_cluster_size(target->bs,
> +                                                     min_cluster_size, errp);
>       if (cluster_size < 0) {
>           return NULL;
>       }
> diff --git a/block/copy-before-write.c b/block/copy-before-write.c
> index dac57481c5..f9896c6c1e 100644
> --- a/block/copy-before-write.c
> +++ b/block/copy-before-write.c
> @@ -476,7 +476,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
>   
>       s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
>       s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
> -                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
> +                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
> +                                  opts->min_cluster_size, errp);

I assume it is guaranteed to be 0 when not specified by user.

>       if (!s->bcs) {
>           error_prepend(errp, "Cannot create block-copy-state: ");
>           return -EINVAL;
> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
> index bdc703bacd..77857c6c68 100644
> --- a/include/block/block-copy.h
> +++ b/include/block/block-copy.h
> @@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>                                        BlockDriverState *copy_bitmap_bs,
>                                        const BdrvDirtyBitmap *bitmap,
>                                        bool discard_source,
> +                                     int64_t min_cluster_size,
>                                        Error **errp);
>   
>   /* Function should be called prior any actual copy request */
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 0a72c590a8..85c8f88f6e 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -4625,12 +4625,18 @@
>   #     @on-cbw-error parameter will decide how this failure is handled.
>   #     Default 0. (Since 7.1)
>   #
> +# @min-cluster-size: Minimum size of blocks used by copy-before-write
> +#     operations.  Has to be a power of 2.  No effect if smaller than
> +#     the maximum of the target's cluster size and 64 KiB.  Default 0.
> +#     (Since 9.0)

will have to update to 9.1

> +#
>   # Since: 6.2
>   ##
>   { 'struct': 'BlockdevOptionsCbw',
>     'base': 'BlockdevOptionsGenericFormat',
>     'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
> -            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
> +            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
> +            '*min-cluster-size': 'uint32' } }
>   
>   ##
>   # @BlockdevOptions:


Otherwise, looks good to me.
Fiona Ebner May 13, 2024, 1:24 p.m. UTC | #3
Am 26.03.24 um 10:06 schrieb Markus Armbruster:
>> @@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>>  
>>      GLOBAL_STATE_CODE();
>>  
>> -    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
>> +    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
> 
> min_cluster_size is int64_t, is_power_of_2() takes uint64_t.  Bad if
> min_cluster_size is negative.  Could this happen?
> 

No, because it comes in as a uint32_t via the QAPI (the internal caller
added by patch 2/2 from the backup code also gets the value via QAPI and
there uint32_t is used too).

---snip---

>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index 0a72c590a8..85c8f88f6e 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -4625,12 +4625,18 @@
>>  #     @on-cbw-error parameter will decide how this failure is handled.
>>  #     Default 0. (Since 7.1)
>>  #
>> +# @min-cluster-size: Minimum size of blocks used by copy-before-write
>> +#     operations.  Has to be a power of 2.  No effect if smaller than
>> +#     the maximum of the target's cluster size and 64 KiB.  Default 0.
>> +#     (Since 9.0)
>> +#
>>  # Since: 6.2
>>  ##
>>  { 'struct': 'BlockdevOptionsCbw',
>>    'base': 'BlockdevOptionsGenericFormat',
>>    'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
>> -            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
>> +            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
>> +            '*min-cluster-size': 'uint32' } }
> 
> Elsewhere in the schema, we use either 'int' or 'size' for cluster-size.
> Why the difference?
> 

The motivation was to disallow negative values up front and have it work
with block_copy_calculate_cluster_size(), whose result is an int64_t. If
I go with 'int', I'll have to add a check to disallow negative values.
If I go with 'size', I'll have to add a check for to disallow too large
values.

Which approach should I go with?

Best Regards,
Fiona
Markus Armbruster May 14, 2024, 12:09 p.m. UTC | #4
Fiona Ebner <f.ebner@proxmox.com> writes:

> Am 26.03.24 um 10:06 schrieb Markus Armbruster:
>>> @@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
>>>  
>>>      GLOBAL_STATE_CODE();
>>>  
>>> -    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
>>> +    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
>> 
>> min_cluster_size is int64_t, is_power_of_2() takes uint64_t.  Bad if
>> min_cluster_size is negative.  Could this happen?
>
> No, because it comes in as a uint32_t via the QAPI (the internal caller
> added by patch 2/2 from the backup code also gets the value via QAPI and
> there uint32_t is used too).

Good.

Is there a practical way to tweak the types so it's more obvious?

>>> +        error_setg(errp, "min-cluster-size needs to be a power of 2");
>>> +        return NULL;
>>> +    }
>>> +
>>> +    cluster_size = block_copy_calculate_cluster_size(target->bs,
>>> +                                                     min_cluster_size, errp);
>>>      if (cluster_size < 0) {
>>>          return NULL;
>>>      }
>
> ---snip---
>
>>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>>> index 0a72c590a8..85c8f88f6e 100644
>>> --- a/qapi/block-core.json
>>> +++ b/qapi/block-core.json
>>> @@ -4625,12 +4625,18 @@
>>>  #     @on-cbw-error parameter will decide how this failure is handled.
>>>  #     Default 0. (Since 7.1)
>>>  #
>>> +# @min-cluster-size: Minimum size of blocks used by copy-before-write
>>> +#     operations.  Has to be a power of 2.  No effect if smaller than
>>> +#     the maximum of the target's cluster size and 64 KiB.  Default 0.
>>> +#     (Since 9.0)
>>> +#
>>>  # Since: 6.2
>>>  ##
>>>  { 'struct': 'BlockdevOptionsCbw',
>>>    'base': 'BlockdevOptionsGenericFormat',
>>>    'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
>>> -            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
>>> +            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
>>> +            '*min-cluster-size': 'uint32' } }
>> 
>> Elsewhere in the schema, we use either 'int' or 'size' for cluster-size.
>> Why the difference?
>
> The motivation was to disallow negative values up front and have it work
> with block_copy_calculate_cluster_size(), whose result is an int64_t.

Let's see whether I understand.

cbw_open() passes the new member @min-cluster-size to
block_copy_state_new().

block_copy_state_new() checks it, and passes it on to
block_copy_calculate_cluster_size().  This is the C code shown above.

block_copy_calculate_cluster_size() uses it like

    return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);

and

    return MAX(min_cluster_size,
               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));

BLOCK_COPY_CLUSTER_SIZE_DEFAULT and bdi.cluster_size are int.  The
return type is int64_t.

Correct?

I don't like mixing signed and unsigned in MAX() like this.  Figuring
out whether it's safe takes a C language lawyer.  I'd rather avoid such
subtle code.  Can we please compute these maxima entirely in the
destination type int64_t?

>                                                                       If
> I go with 'int', I'll have to add a check to disallow negative values.
> If I go with 'size', I'll have to add a check for to disallow too large
> values.
>
> Which approach should I go with?

For me, reducing the need for manual checking is important, but
cleanliness of the external interface trumps it.  I'd use 'size'.

Not the first time I see friction between QAPI 'size' or 'uint64' and
the block layer's use of int64_t.

The block layer likes to use int64_t even when the value must not be
negative.  There are good reasons for that.

Perhaps a QAPI type for "non-negative int64_t" could be useful.  Weird,
but useful.
diff mbox series

Patch

diff --git a/block/block-copy.c b/block/block-copy.c
index 7e3b378528..adb1cbb440 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -310,6 +310,7 @@  void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
 }
 
 static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+                                                 int64_t min_cluster_size,
                                                  Error **errp)
 {
     int ret;
@@ -335,7 +336,7 @@  static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
                     "used. If the actual block size of the target exceeds "
                     "this default, the backup may be unusable",
                     BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
     } else if (ret < 0 && !target_does_cow) {
         error_setg_errno(errp, -ret,
             "Couldn't determine the cluster size of the target image, "
@@ -345,16 +346,18 @@  static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
         return ret;
     } else if (ret < 0 && target_does_cow) {
         /* Not fatal; just trudge on ahead. */
-        return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
+        return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
     }
 
-    return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+    return MAX(min_cluster_size,
+               MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
 }
 
 BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
                                      BlockDriverState *copy_bitmap_bs,
                                      const BdrvDirtyBitmap *bitmap,
                                      bool discard_source,
+                                     int64_t min_cluster_size,
                                      Error **errp)
 {
     ERRP_GUARD();
@@ -365,7 +368,13 @@  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
 
     GLOBAL_STATE_CODE();
 
-    cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
+    if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
+        error_setg(errp, "min-cluster-size needs to be a power of 2");
+        return NULL;
+    }
+
+    cluster_size = block_copy_calculate_cluster_size(target->bs,
+                                                     min_cluster_size, errp);
     if (cluster_size < 0) {
         return NULL;
     }
diff --git a/block/copy-before-write.c b/block/copy-before-write.c
index dac57481c5..f9896c6c1e 100644
--- a/block/copy-before-write.c
+++ b/block/copy-before-write.c
@@ -476,7 +476,8 @@  static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
 
     s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
     s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
-                                  flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+                                  flags & BDRV_O_CBW_DISCARD_SOURCE,
+                                  opts->min_cluster_size, errp);
     if (!s->bcs) {
         error_prepend(errp, "Cannot create block-copy-state: ");
         return -EINVAL;
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index bdc703bacd..77857c6c68 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -28,6 +28,7 @@  BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
                                      BlockDriverState *copy_bitmap_bs,
                                      const BdrvDirtyBitmap *bitmap,
                                      bool discard_source,
+                                     int64_t min_cluster_size,
                                      Error **errp);
 
 /* Function should be called prior any actual copy request */
diff --git a/qapi/block-core.json b/qapi/block-core.json
index 0a72c590a8..85c8f88f6e 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -4625,12 +4625,18 @@ 
 #     @on-cbw-error parameter will decide how this failure is handled.
 #     Default 0. (Since 7.1)
 #
+# @min-cluster-size: Minimum size of blocks used by copy-before-write
+#     operations.  Has to be a power of 2.  No effect if smaller than
+#     the maximum of the target's cluster size and 64 KiB.  Default 0.
+#     (Since 9.0)
+#
 # Since: 6.2
 ##
 { 'struct': 'BlockdevOptionsCbw',
   'base': 'BlockdevOptionsGenericFormat',
   'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
-            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
+            '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
+            '*min-cluster-size': 'uint32' } }
 
 ##
 # @BlockdevOptions: