diff mbox series

[v8,5/7] copy-on-read: limit guest writes to base in COR driver

Message ID 1598633579-221780-6-git-send-email-andrey.shinkevich@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series Apply COR-filter to the block-stream permanently | expand

Commit Message

Zhijian Li (Fujitsu)" via Aug. 28, 2020, 4:52 p.m. UTC
Limit the guest's COR operations by the base node in the backing chain
during a stream job.

Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
---
 block/copy-on-read.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

Comments

Max Reitz Sept. 4, 2020, 12:50 p.m. UTC | #1
On 28.08.20 18:52, Andrey Shinkevich wrote:
> Limit the guest's COR operations by the base node in the backing chain
> during a stream job.

I don’t understand.   Shouldn’t we limit the areas where we set the COR
flag?

> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
> ---
>  block/copy-on-read.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 49 insertions(+)
> 
> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
> index 1f858bb..ecbd1f8 100644
> --- a/block/copy-on-read.c
> +++ b/block/copy-on-read.c
> @@ -57,6 +57,37 @@ static BlockDriverState *get_base_by_name(BlockDriverState *bs,
>      return base_bs;
>  }
>  
> +/*
> + * Returns 1 if the block is allocated in a node between cor_filter_bs->file->bs
> + * and the base_bs in the backing chain. Otherwise, returns 0.
> + * The COR operation is allowed if the base_bs is not specified - return 1.
> + * Returns negative errno on failure.
> + */
> +static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
> +                           uint64_t bytes)
> +{
> +    int ret;
> +    int64_t dummy;
> +    BlockDriverState *file = NULL;
> +    BDRVStateCOR *state = cor_filter_bs->opaque;
> +
> +    if (!state->base_bs) {
> +        return 1;
> +    }
> +
> +    ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
> +                                  offset, bytes, &dummy, NULL, &file);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    if (file) {

Why check file and not the return value?

> +        return 1;
> +    }
> +
> +    return 0;

“dummy” should really not be called that way, it should be evaluated
whether it’s smaller than bytes.

First, [offset, offset + dummy) may not be allocated above the base –
but [offset + dummy, offset + bytes) may be.  Then this function returns
0 here, even though there is something in that range that’s allocated.

Second, in that case we still shouldn’t return 1, but return the
shrunken offset instead.  Or, if there are multiple distinct allocated
areas, they should probably even all be copied separately.


(But all of that of course only if this function is intended to be used
to limit where we set the COR flag, because I don’t understand why we’d
want to limit where something can be written.)

Max

> +}
> +
>  static int cor_open(BlockDriverState *bs, QDict *options, int flags,
>                      Error **errp)
>  {
> @@ -153,6 +184,12 @@ static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
>                                              QEMUIOVector *qiov,
>                                              size_t qiov_offset, int flags)
>  {
> +    int ret = node_above_base(bs, offset, bytes);
> +
> +    if (!ret || ret < 0) {
> +        return ret;
> +    }
> +
>      return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
>                                  flags);
>  }
> @@ -162,6 +199,12 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
>                                               int64_t offset, int bytes,
>                                               BdrvRequestFlags flags)
>  {
> +    int ret = node_above_base(bs, offset, bytes);
> +
> +    if (!ret || ret < 0) {
> +        return ret;
> +    }
> +
>      return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
>  }
>  
> @@ -178,6 +221,12 @@ static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
>                                                    uint64_t bytes,
>                                                    QEMUIOVector *qiov)
>  {
> +    int ret = node_above_base(bs, offset, bytes);
> +
> +    if (!ret || ret < 0) {
> +        return ret;
> +    }
> +
>      return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
>                             BDRV_REQ_WRITE_COMPRESSED);
>  }
>
Vladimir Sementsov-Ogievskiy Sept. 4, 2020, 1:59 p.m. UTC | #2
04.09.2020 15:50, Max Reitz wrote:
> On 28.08.20 18:52, Andrey Shinkevich wrote:
>> Limit the guest's COR operations by the base node in the backing chain
>> during a stream job.
> 
> I don’t understand.   Shouldn’t we limit the areas where we set the COR
> flag?
> 
>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>> ---
>>   block/copy-on-read.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 49 insertions(+)
>>
>> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
>> index 1f858bb..ecbd1f8 100644
>> --- a/block/copy-on-read.c
>> +++ b/block/copy-on-read.c
>> @@ -57,6 +57,37 @@ static BlockDriverState *get_base_by_name(BlockDriverState *bs,
>>       return base_bs;
>>   }
>>   
>> +/*
>> + * Returns 1 if the block is allocated in a node between cor_filter_bs->file->bs
>> + * and the base_bs in the backing chain. Otherwise, returns 0.
>> + * The COR operation is allowed if the base_bs is not specified - return 1.
>> + * Returns negative errno on failure.
>> + */
>> +static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
>> +                           uint64_t bytes)
>> +{
>> +    int ret;
>> +    int64_t dummy;
>> +    BlockDriverState *file = NULL;
>> +    BDRVStateCOR *state = cor_filter_bs->opaque;
>> +
>> +    if (!state->base_bs) {
>> +        return 1;
>> +    }
>> +
>> +    ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
>> +                                  offset, bytes, &dummy, NULL, &file);
>> +    if (ret < 0) {
>> +        return ret;
>> +    }
>> +
>> +    if (file) {
> 
> Why check file and not the return value?
> 
>> +        return 1;
>> +    }
>> +
>> +    return 0;
> 
> “dummy” should really not be called that way, it should be evaluated
> whether it’s smaller than bytes.
> 
> First, [offset, offset + dummy) may not be allocated above the base –
> but [offset + dummy, offset + bytes) may be.  Then this function returns
> 0 here, even though there is something in that range that’s allocated.
> 
> Second, in that case we still shouldn’t return 1, but return the
> shrunken offset instead.  Or, if there are multiple distinct allocated
> areas, they should probably even all be copied separately.
> 
> 
> (But all of that of course only if this function is intended to be used
> to limit where we set the COR flag, because I don’t understand why we’d
> want to limit where something can be written.)
> 

Agree to all.

1. Write path shouldn't be changed: it's a copy-on-read filter.

2. On read we need is_allocated_above-driven loop, to insert the flag only to regions allocated above base
  (and other regions we read just without the flag, don't skip them). qiov_offset will help very well.

3. Like in many other places, let's ignore block-status errors (and just add the flag if block_status fails)

>> +}
>> +
>>   static int cor_open(BlockDriverState *bs, QDict *options, int flags,
>>                       Error **errp)
>>   {
>> @@ -153,6 +184,12 @@ static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
>>                                               QEMUIOVector *qiov,
>>                                               size_t qiov_offset, int flags)
>>   {
>> +    int ret = node_above_base(bs, offset, bytes);
>> +
>> +    if (!ret || ret < 0) {
>> +        return ret;
>> +    }
>> +
>>       return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
>>                                   flags);
>>   }
>> @@ -162,6 +199,12 @@ static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
>>                                                int64_t offset, int bytes,
>>                                                BdrvRequestFlags flags)
>>   {
>> +    int ret = node_above_base(bs, offset, bytes);
>> +
>> +    if (!ret || ret < 0) {
>> +        return ret;
>> +    }
>> +
>>       return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
>>   }
>>   
>> @@ -178,6 +221,12 @@ static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
>>                                                     uint64_t bytes,
>>                                                     QEMUIOVector *qiov)
>>   {
>> +    int ret = node_above_base(bs, offset, bytes);
>> +
>> +    if (!ret || ret < 0) {
>> +        return ret;
>> +    }
>> +
>>       return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
>>                              BDRV_REQ_WRITE_COMPRESSED);
>>   }
>>
> 
>
Andrey Shinkevich Sept. 22, 2020, 1:13 p.m. UTC | #3
On 04.09.2020 16:59, Vladimir Sementsov-Ogievskiy wrote:
> 04.09.2020 15:50, Max Reitz wrote:
>> On 28.08.20 18:52, Andrey Shinkevich wrote:
>>> Limit the guest's COR operations by the base node in the backing chain
>>> during a stream job.
>>
>> I don’t understand.   Shouldn’t we limit the areas where we set the COR
>> flag?
>>
>>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>>> ---
>>>   block/copy-on-read.c | 49 
>>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 49 insertions(+)
>>>
>>> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
>>> index 1f858bb..ecbd1f8 100644
>>> --- a/block/copy-on-read.c
>>> +++ b/block/copy-on-read.c
>>> @@ -57,6 +57,37 @@ static BlockDriverState 
>>> *get_base_by_name(BlockDriverState *bs,
>>>       return base_bs;
>>>   }
>>>   +/*
>>> + * Returns 1 if the block is allocated in a node between 
>>> cor_filter_bs->file->bs
>>> + * and the base_bs in the backing chain. Otherwise, returns 0.
>>> + * The COR operation is allowed if the base_bs is not specified - 
>>> return 1.
>>> + * Returns negative errno on failure.
>>> + */
>>> +static int node_above_base(BlockDriverState *cor_filter_bs, 
>>> uint64_t offset,
>>> +                           uint64_t bytes)
>>> +{
>>> +    int ret;
>>> +    int64_t dummy;
>>> +    BlockDriverState *file = NULL;
>>> +    BDRVStateCOR *state = cor_filter_bs->opaque;
>>> +
>>> +    if (!state->base_bs) {
>>> +        return 1;
>>> +    }
>>> +
>>> +    ret = bdrv_block_status_above(cor_filter_bs->file->bs, 
>>> state->base_bs,
>>> +                                  offset, bytes, &dummy, NULL, &file);
>>> +    if (ret < 0) {
>>> +        return ret;
>>> +    }
>>> +
>>> +    if (file) {
>>
>> Why check file and not the return value?
>>
>>> +        return 1;
>>> +    }
>>> +
>>> +    return 0;
>>
>> “dummy” should really not be called that way, it should be evaluated
>> whether it’s smaller than bytes.
>>
>> First, [offset, offset + dummy) may not be allocated above the base –
>> but [offset + dummy, offset + bytes) may be.  Then this function returns
>> 0 here, even though there is something in that range that’s allocated.
>>
>> Second, in that case we still shouldn’t return 1, but return the
>> shrunken offset instead.  Or, if there are multiple distinct allocated
>> areas, they should probably even all be copied separately.
>>
>>
>> (But all of that of course only if this function is intended to be used
>> to limit where we set the COR flag, because I don’t understand why we’d
>> want to limit where something can be written.)
>>
>
> Agree to all.
>
> 1. Write path shouldn't be changed: it's a copy-on-read filter.
>
> 2. On read we need is_allocated_above-driven loop, to insert the flag 
> only to regions allocated above base
>  (and other regions we read just without the flag, don't skip them). 
> qiov_offset will help very well.
>
> 3. Like in many other places, let's ignore  errors (and just add the 
> flag if block_status fails)


If "block_status" fails, the stream job does not copy. Shall we keep the 
same behavior in the cor_co_preadv_part()?


Andrey

>
>>> +}
>>> +
>>>   static int cor_open(BlockDriverState *bs, QDict *options, int flags,
>>>                       Error **errp)
>>>   {
>>> @@ -153,6 +184,12 @@ static int coroutine_fn 
>>> cor_co_pwritev_part(BlockDriverState *bs,
>>>                                               QEMUIOVector *qiov,
>>>                                               size_t qiov_offset, 
>>> int flags)
>>>   {
>>> +    int ret = node_above_base(bs, offset, bytes);
>>> +
>>> +    if (!ret || ret < 0) {
>>> +        return ret;
>>> +    }
>>> +
>>>       return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, 
>>> qiov_offset,
>>>                                   flags);
>>>   }
>>> @@ -162,6 +199,12 @@ static int coroutine_fn 
>>> cor_co_pwrite_zeroes(BlockDriverState *bs,
>>>                                                int64_t offset, int 
>>> bytes,
>>> BdrvRequestFlags flags)
>>>   {
>>> +    int ret = node_above_base(bs, offset, bytes);
>>> +
>>> +    if (!ret || ret < 0) {
>>> +        return ret;
>>> +    }
>>> +
>>>       return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
>>>   }
>>>   @@ -178,6 +221,12 @@ static int coroutine_fn 
>>> cor_co_pwritev_compressed(BlockDriverState *bs,
>>>                                                     uint64_t bytes,
>>> QEMUIOVector *qiov)
>>>   {
>>> +    int ret = node_above_base(bs, offset, bytes);
>>> +
>>> +    if (!ret || ret < 0) {
>>> +        return ret;
>>> +    }
>>> +
>>>       return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
>>>                              BDRV_REQ_WRITE_COMPRESSED);
>>>   }
>>>
>>
>>
>
>
Max Reitz Sept. 24, 2020, 11:18 a.m. UTC | #4
On 22.09.20 15:13, Andrey Shinkevich wrote:
> On 04.09.2020 16:59, Vladimir Sementsov-Ogievskiy wrote:
>> 04.09.2020 15:50, Max Reitz wrote:
>>> On 28.08.20 18:52, Andrey Shinkevich wrote:
>>>> Limit the guest's COR operations by the base node in the backing chain
>>>> during a stream job.
>>>
>>> I don’t understand.   Shouldn’t we limit the areas where we set the COR
>>> flag?
>>>
>>>> Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
>>>> ---
>>>>   block/copy-on-read.c | 49
>>>> +++++++++++++++++++++++++++++++++++++++++++++++++
>>>>   1 file changed, 49 insertions(+)
>>>>
>>>> diff --git a/block/copy-on-read.c b/block/copy-on-read.c
>>>> index 1f858bb..ecbd1f8 100644
>>>> --- a/block/copy-on-read.c
>>>> +++ b/block/copy-on-read.c
>>>> @@ -57,6 +57,37 @@ static BlockDriverState
>>>> *get_base_by_name(BlockDriverState *bs,
>>>>       return base_bs;
>>>>   }
>>>>   +/*
>>>> + * Returns 1 if the block is allocated in a node between
>>>> cor_filter_bs->file->bs
>>>> + * and the base_bs in the backing chain. Otherwise, returns 0.
>>>> + * The COR operation is allowed if the base_bs is not specified -
>>>> return 1.
>>>> + * Returns negative errno on failure.
>>>> + */
>>>> +static int node_above_base(BlockDriverState *cor_filter_bs,
>>>> uint64_t offset,
>>>> +                           uint64_t bytes)
>>>> +{
>>>> +    int ret;
>>>> +    int64_t dummy;
>>>> +    BlockDriverState *file = NULL;
>>>> +    BDRVStateCOR *state = cor_filter_bs->opaque;
>>>> +
>>>> +    if (!state->base_bs) {
>>>> +        return 1;
>>>> +    }
>>>> +
>>>> +    ret = bdrv_block_status_above(cor_filter_bs->file->bs,
>>>> state->base_bs,
>>>> +                                  offset, bytes, &dummy, NULL, &file);
>>>> +    if (ret < 0) {
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    if (file) {
>>>
>>> Why check file and not the return value?
>>>
>>>> +        return 1;
>>>> +    }
>>>> +
>>>> +    return 0;
>>>
>>> “dummy” should really not be called that way, it should be evaluated
>>> whether it’s smaller than bytes.
>>>
>>> First, [offset, offset + dummy) may not be allocated above the base –
>>> but [offset + dummy, offset + bytes) may be.  Then this function returns
>>> 0 here, even though there is something in that range that’s allocated.
>>>
>>> Second, in that case we still shouldn’t return 1, but return the
>>> shrunken offset instead.  Or, if there are multiple distinct allocated
>>> areas, they should probably even all be copied separately.
>>>
>>>
>>> (But all of that of course only if this function is intended to be used
>>> to limit where we set the COR flag, because I don’t understand why we’d
>>> want to limit where something can be written.)
>>>
>>
>> Agree to all.
>>
>> 1. Write path shouldn't be changed: it's a copy-on-read filter.
>>
>> 2. On read we need is_allocated_above-driven loop, to insert the flag
>> only to regions allocated above base
>>  (and other regions we read just without the flag, don't skip them).
>> qiov_offset will help very well.
>>
>> 3. Like in many other places, let's ignore  errors (and just add the
>> flag if block_status fails)
> 
> 
> If "block_status" fails, the stream job does not copy. Shall we keep the
> same behavior in the cor_co_preadv_part()?

I think copying can’t really hurt, so I think it would be better to copy
if we aren’t sure (because block_status failed).  The difference to
streaming could well be considered a bug fix.

Max
diff mbox series

Patch

diff --git a/block/copy-on-read.c b/block/copy-on-read.c
index 1f858bb..ecbd1f8 100644
--- a/block/copy-on-read.c
+++ b/block/copy-on-read.c
@@ -57,6 +57,37 @@  static BlockDriverState *get_base_by_name(BlockDriverState *bs,
     return base_bs;
 }
 
+/*
+ * Returns 1 if the block is allocated in a node between cor_filter_bs->file->bs
+ * and the base_bs in the backing chain. Otherwise, returns 0.
+ * The COR operation is allowed if the base_bs is not specified - return 1.
+ * Returns negative errno on failure.
+ */
+static int node_above_base(BlockDriverState *cor_filter_bs, uint64_t offset,
+                           uint64_t bytes)
+{
+    int ret;
+    int64_t dummy;
+    BlockDriverState *file = NULL;
+    BDRVStateCOR *state = cor_filter_bs->opaque;
+
+    if (!state->base_bs) {
+        return 1;
+    }
+
+    ret = bdrv_block_status_above(cor_filter_bs->file->bs, state->base_bs,
+                                  offset, bytes, &dummy, NULL, &file);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (file) {
+        return 1;
+    }
+
+    return 0;
+}
+
 static int cor_open(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp)
 {
@@ -153,6 +184,12 @@  static int coroutine_fn cor_co_pwritev_part(BlockDriverState *bs,
                                             QEMUIOVector *qiov,
                                             size_t qiov_offset, int flags)
 {
+    int ret = node_above_base(bs, offset, bytes);
+
+    if (!ret || ret < 0) {
+        return ret;
+    }
+
     return bdrv_co_pwritev_part(bs->file, offset, bytes, qiov, qiov_offset,
                                 flags);
 }
@@ -162,6 +199,12 @@  static int coroutine_fn cor_co_pwrite_zeroes(BlockDriverState *bs,
                                              int64_t offset, int bytes,
                                              BdrvRequestFlags flags)
 {
+    int ret = node_above_base(bs, offset, bytes);
+
+    if (!ret || ret < 0) {
+        return ret;
+    }
+
     return bdrv_co_pwrite_zeroes(bs->file, offset, bytes, flags);
 }
 
@@ -178,6 +221,12 @@  static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
                                                   uint64_t bytes,
                                                   QEMUIOVector *qiov)
 {
+    int ret = node_above_base(bs, offset, bytes);
+
+    if (!ret || ret < 0) {
+        return ret;
+    }
+
     return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
                            BDRV_REQ_WRITE_COMPRESSED);
 }