diff mbox

[v13,2/3] quorum: implement bdrv_add_child() and bdrv_del_child()

Message ID 1460536389-9161-3-git-send-email-xiecl.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Changlong Xie April 13, 2016, 8:33 a.m. UTC
From: Wen Congyang <wency@cn.fujitsu.com>

Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
---
 block.c               |  8 +++---
 block/quorum.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++--
 include/block/block.h |  4 +++
 3 files changed, 84 insertions(+), 6 deletions(-)

Comments

Changlong Xie April 20, 2016, 3:36 a.m. UTC | #1
ping...

On 04/13/2016 04:33 PM, Changlong Xie wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>
>
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
> ---
>   block.c               |  8 +++---
>   block/quorum.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++--
>   include/block/block.h |  4 +++
>   3 files changed, 84 insertions(+), 6 deletions(-)
>
> diff --git a/block.c b/block.c
> index 68cd3b2..4bdc6b3 100644
> --- a/block.c
> +++ b/block.c
> @@ -1176,10 +1176,10 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>       return child;
>   }
>
> -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> -                                    BlockDriverState *child_bs,
> -                                    const char *child_name,
> -                                    const BdrvChildRole *child_role)
> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> +                             BlockDriverState *child_bs,
> +                             const char *child_name,
> +                             const BdrvChildRole *child_role)
>   {
>       BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
>       QLIST_INSERT_HEAD(&parent_bs->children, child, next);
> diff --git a/block/quorum.c b/block/quorum.c
> index da15465..2553f82 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -14,6 +14,7 @@
>    */
>
>   #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>   #include "block/block_int.h"
>   #include "qapi/qmp/qbool.h"
>   #include "qapi/qmp/qdict.h"
> @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
>   typedef struct BDRVQuorumState {
>       BdrvChild **children;  /* children BlockDriverStates */
>       int num_children;      /* children count */
> +    uint64_t last_index;   /* indicate the child role name of the last
> +                            * element of children array
> +                            */
>       int threshold;         /* if less than threshold children reads gave the
>                               * same result a quorum error occurs.
>                               */
> @@ -898,9 +902,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>           ret = -EINVAL;
>           goto exit;
>       }
> -    if (s->num_children < 2) {
> +    if (s->num_children < 1) {
>           error_setg(&local_err,
> -                   "Number of provided children must be greater than 1");
> +                   "Number of provided children must be 1 or more");
>           ret = -EINVAL;
>           goto exit;
>       }
> @@ -964,6 +968,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>
>           opened[i] = true;
>       }
> +    s->last_index = s->num_children;
>
>       g_free(opened);
>       goto exit;
> @@ -1020,6 +1025,72 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
>       }
>   }
>
> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
> +                             Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    BdrvChild *child;
> +    char indexstr[32];
> +    int ret;
> +
> +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
> +           s->last_index <= UINT64_MAX);
> +    if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
> +        s->last_index == UINT64_MAX) {
> +        error_setg(errp, "Too many children");
> +        return;
> +    }
> +
> +    ret = snprintf(indexstr, 32, "children.%" PRIu64, s->last_index);
> +    if (ret < 0 || ret >= 32) {
> +        error_setg(errp, "cannot generate child name");
> +        return;
> +    }
> +    s->last_index++;
> +
> +    bdrv_drain(bs);
> +
> +    /* We can safely add the child now */
> +    bdrv_ref(child_bs);
> +    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
> +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
> +    s->children[s->num_children++] = child;
> +}
> +
> +static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
> +                             Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->num_children; i++) {
> +        if (s->children[i] == child) {
> +            break;
> +        }
> +    }
> +
> +    /* we have checked it in bdrv_del_child() */
> +    assert(i < s->num_children);
> +
> +    if (s->num_children <= s->threshold) {
> +        error_setg(errp,
> +            "The number of children cannot be lower than the vote threshold %d",
> +            s->threshold);
> +        return;
> +    }
> +
> +    /* child->name is "children.%d" */
> +    assert(!strncmp(child->name, "children.", 9));
> +
> +    bdrv_drain(bs);
> +
> +    /* We can safely remove this child now */
> +    memmove(&s->children[i], &s->children[i + 1],
> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
> +    bdrv_unref_child(bs, child);
> +}
> +
>   static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
>   {
>       BDRVQuorumState *s = bs->opaque;
> @@ -1075,6 +1146,9 @@ static BlockDriver bdrv_quorum = {
>       .bdrv_detach_aio_context            = quorum_detach_aio_context,
>       .bdrv_attach_aio_context            = quorum_attach_aio_context,
>
> +    .bdrv_add_child                     = quorum_add_child,
> +    .bdrv_del_child                     = quorum_del_child,
> +
>       .is_filter                          = true,
>       .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
>   };
> diff --git a/include/block/block.h b/include/block/block.h
> index 694ca76..52902cd 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -476,6 +476,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
>   void bdrv_ref(BlockDriverState *bs);
>   void bdrv_unref(BlockDriverState *bs);
>   void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> +                             BlockDriverState *child_bs,
> +                             const char *child_name,
> +                             const BdrvChildRole *child_role);
>
>   bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
>   void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
>
Max Reitz May 6, 2016, 3:20 p.m. UTC | #2
On 13.04.2016 10:33, Changlong Xie wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>
> 
> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
> ---
>  block.c               |  8 +++---
>  block/quorum.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++--
>  include/block/block.h |  4 +++
>  3 files changed, 84 insertions(+), 6 deletions(-)

Design-wise: Nice change. It's a bit strange now to have gaps in the
child naming, but this strategy is very simple to implement and the
order of the children used for FIFO is the same as the numerical order
of the indices in the child names, so I'm happy.

> diff --git a/block.c b/block.c
> index 68cd3b2..4bdc6b3 100644
> --- a/block.c
> +++ b/block.c
> @@ -1176,10 +1176,10 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>      return child;
>  }
>  
> -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> -                                    BlockDriverState *child_bs,
> -                                    const char *child_name,
> -                                    const BdrvChildRole *child_role)
> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> +                             BlockDriverState *child_bs,
> +                             const char *child_name,
> +                             const BdrvChildRole *child_role)
>  {
>      BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
>      QLIST_INSERT_HEAD(&parent_bs->children, child, next);
> diff --git a/block/quorum.c b/block/quorum.c
> index da15465..2553f82 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -14,6 +14,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include "qemu/cutils.h"
>  #include "block/block_int.h"
>  #include "qapi/qmp/qbool.h"
>  #include "qapi/qmp/qdict.h"
> @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
>  typedef struct BDRVQuorumState {
>      BdrvChild **children;  /* children BlockDriverStates */
>      int num_children;      /* children count */
> +    uint64_t last_index;   /* indicate the child role name of the last
> +                            * element of children array

The name (and the comment) is a bit misleading, as it's not the index of
the last child but one after that, i.e. the index of the next child
should it be added.

So maybe this variable should be called "next_child_index" or something
similar, and the comment should reflect that.

> +                            */
>      int threshold;         /* if less than threshold children reads gave the
>                              * same result a quorum error occurs.
>                              */
> @@ -898,9 +902,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>          ret = -EINVAL;
>          goto exit;
>      }
> -    if (s->num_children < 2) {
> +    if (s->num_children < 1) {
>          error_setg(&local_err,
> -                   "Number of provided children must be greater than 1");
> +                   "Number of provided children must be 1 or more");
>          ret = -EINVAL;
>          goto exit;
>      }
> @@ -964,6 +968,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>  
>          opened[i] = true;
>      }
> +    s->last_index = s->num_children;
>  
>      g_free(opened);
>      goto exit;
> @@ -1020,6 +1025,72 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
>      }
>  }
>  
> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
> +                             Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    BdrvChild *child;
> +    char indexstr[32];
> +    int ret;
> +
> +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
> +           s->last_index <= UINT64_MAX);
> +    if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
> +        s->last_index == UINT64_MAX) {
> +        error_setg(errp, "Too many children");
> +        return;
> +    }
> +
> +    ret = snprintf(indexstr, 32, "children.%" PRIu64, s->last_index);
> +    if (ret < 0 || ret >= 32) {
> +        error_setg(errp, "cannot generate child name");
> +        return;
> +    }
> +    s->last_index++;
> +
> +    bdrv_drain(bs);

We have bdrv_drained_begin() and bdrv_drained_end() now. Perhaps we
should make use of them and call bdrv_drained_begin() here...

> +
> +    /* We can safely add the child now */
> +    bdrv_ref(child_bs);
> +    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
> +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
> +    s->children[s->num_children++] = child;

...and bdrv_drained_end() here.

> +}
> +
> +static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
> +                             Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    int i;
> +
> +    for (i = 0; i < s->num_children; i++) {
> +        if (s->children[i] == child) {
> +            break;
> +        }
> +    }
> +
> +    /* we have checked it in bdrv_del_child() */
> +    assert(i < s->num_children);
> +
> +    if (s->num_children <= s->threshold) {
> +        error_setg(errp,
> +            "The number of children cannot be lower than the vote threshold %d",
> +            s->threshold);
> +        return;
> +    }
> +
> +    /* child->name is "children.%d" */
> +    assert(!strncmp(child->name, "children.", 9));

This could be removed now, too. I was asking for these assertions (of
which in this version only this one is left) because we were using the
child name to infer the index in the child bitmap before.

Now we're not using the child name here at all, so we can just drop this
assertion.

> +
> +    bdrv_drain(bs);

As above, bdrv_drained_begin() here...

> +
> +    /* We can safely remove this child now */
> +    memmove(&s->children[i], &s->children[i + 1],
> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
> +    bdrv_unref_child(bs, child);

...and bdrv_drained_end() here may be better than a plain bdrv_drain().

> +}
> +
>  static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
>  {
>      BDRVQuorumState *s = bs->opaque;
> @@ -1075,6 +1146,9 @@ static BlockDriver bdrv_quorum = {
>      .bdrv_detach_aio_context            = quorum_detach_aio_context,
>      .bdrv_attach_aio_context            = quorum_attach_aio_context,
>  
> +    .bdrv_add_child                     = quorum_add_child,
> +    .bdrv_del_child                     = quorum_del_child,
> +
>      .is_filter                          = true,
>      .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
>  };
> diff --git a/include/block/block.h b/include/block/block.h
> index 694ca76..52902cd 100644
> --- a/include/block/block.h
> +++ b/include/block/block.h
> @@ -476,6 +476,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
>  void bdrv_ref(BlockDriverState *bs);
>  void bdrv_unref(BlockDriverState *bs);
>  void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
> +                             BlockDriverState *child_bs,
> +                             const char *child_name,
> +                             const BdrvChildRole *child_role);
>  
>  bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
>  void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
> 

None of the things I pointed out above is critical, but I'd still rather
see them fixed before giving my R-b.

Max
Changlong Xie May 9, 2016, 9:26 a.m. UTC | #3
On 05/06/2016 11:20 PM, Max Reitz wrote:
> On 13.04.2016 10:33, Changlong Xie wrote:
>> From: Wen Congyang <wency@cn.fujitsu.com>
>>
>> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> Signed-off-by: Changlong Xie <xiecl.fnst@cn.fujitsu.com>
>> ---
>>   block.c               |  8 +++---
>>   block/quorum.c        | 78 +++++++++++++++++++++++++++++++++++++++++++++++++--
>>   include/block/block.h |  4 +++
>>   3 files changed, 84 insertions(+), 6 deletions(-)
>
> Design-wise: Nice change. It's a bit strange now to have gaps in the
> child naming, but this strategy is very simple to implement and the
> order of the children used for FIFO is the same as the numerical order
> of the indices in the child names, so I'm happy.

I think this is the simplest approach too : )

>
>> diff --git a/block.c b/block.c
>> index 68cd3b2..4bdc6b3 100644
>> --- a/block.c
>> +++ b/block.c
>> @@ -1176,10 +1176,10 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
>>       return child;
>>   }
>>
>> -static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
>> -                                    BlockDriverState *child_bs,
>> -                                    const char *child_name,
>> -                                    const BdrvChildRole *child_role)
>> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
>> +                             BlockDriverState *child_bs,
>> +                             const char *child_name,
>> +                             const BdrvChildRole *child_role)
>>   {
>>       BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
>>       QLIST_INSERT_HEAD(&parent_bs->children, child, next);
>> diff --git a/block/quorum.c b/block/quorum.c
>> index da15465..2553f82 100644
>> --- a/block/quorum.c
>> +++ b/block/quorum.c
>> @@ -14,6 +14,7 @@
>>    */
>>
>>   #include "qemu/osdep.h"
>> +#include "qemu/cutils.h"
>>   #include "block/block_int.h"
>>   #include "qapi/qmp/qbool.h"
>>   #include "qapi/qmp/qdict.h"
>> @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
>>   typedef struct BDRVQuorumState {
>>       BdrvChild **children;  /* children BlockDriverStates */
>>       int num_children;      /* children count */
>> +    uint64_t last_index;   /* indicate the child role name of the last
>> +                            * element of children array
>
> The name (and the comment) is a bit misleading, as it's not the index of
> the last child but one after that, i.e. the index of the next child
> should it be added.
>
> So maybe this variable should be called "next_child_index" or something
> similar, and the comment should reflect that.
>

Will fix.

>> +                            */
>>       int threshold;         /* if less than threshold children reads gave the
>>                               * same result a quorum error occurs.
>>                               */
>> @@ -898,9 +902,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>>           ret = -EINVAL;
>>           goto exit;
>>       }
>> -    if (s->num_children < 2) {
>> +    if (s->num_children < 1) {
>>           error_setg(&local_err,
>> -                   "Number of provided children must be greater than 1");
>> +                   "Number of provided children must be 1 or more");
>>           ret = -EINVAL;
>>           goto exit;
>>       }
>> @@ -964,6 +968,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
>>
>>           opened[i] = true;
>>       }
>> +    s->last_index = s->num_children;
>>
>>       g_free(opened);
>>       goto exit;
>> @@ -1020,6 +1025,72 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
>>       }
>>   }
>>
>> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
>> +                             Error **errp)
>> +{
>> +    BDRVQuorumState *s = bs->opaque;
>> +    BdrvChild *child;
>> +    char indexstr[32];
>> +    int ret;
>> +
>> +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
>> +           s->last_index <= UINT64_MAX);
>> +    if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
>> +        s->last_index == UINT64_MAX) {
>> +        error_setg(errp, "Too many children");
>> +        return;
>> +    }
>> +
>> +    ret = snprintf(indexstr, 32, "children.%" PRIu64, s->last_index);
>> +    if (ret < 0 || ret >= 32) {
>> +        error_setg(errp, "cannot generate child name");
>> +        return;
>> +    }
>> +    s->last_index++;
>> +
>> +    bdrv_drain(bs);
>
> We have bdrv_drained_begin() and bdrv_drained_end() now. Perhaps we
> should make use of them and call bdrv_drained_begin() here...

Ditto

>
>> +
>> +    /* We can safely add the child now */
>> +    bdrv_ref(child_bs);
>> +    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
>> +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
>> +    s->children[s->num_children++] = child;
>
> ...and bdrv_drained_end() here.

Ditto

>
>> +}
>> +
>> +static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
>> +                             Error **errp)
>> +{
>> +    BDRVQuorumState *s = bs->opaque;
>> +    int i;
>> +
>> +    for (i = 0; i < s->num_children; i++) {
>> +        if (s->children[i] == child) {
>> +            break;
>> +        }
>> +    }
>> +
>> +    /* we have checked it in bdrv_del_child() */
>> +    assert(i < s->num_children);
>> +
>> +    if (s->num_children <= s->threshold) {
>> +        error_setg(errp,
>> +            "The number of children cannot be lower than the vote threshold %d",
>> +            s->threshold);
>> +        return;
>> +    }
>> +
>> +    /* child->name is "children.%d" */
>> +    assert(!strncmp(child->name, "children.", 9));
>
> This could be removed now, too. I was asking for these assertions (of
> which in this version only this one is left) because we were using the
> child name to infer the index in the child bitmap before.
>
> Now we're not using the child name here at all, so we can just drop this
> assertion.

Will remove it in next version.

>
>> +
>> +    bdrv_drain(bs);
>
> As above, bdrv_drained_begin() here...

Will fix it.

>
>> +
>> +    /* We can safely remove this child now */
>> +    memmove(&s->children[i], &s->children[i + 1],
>> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
>> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
>> +    bdrv_unref_child(bs, child);
>
> ...and bdrv_drained_end() here may be better than a plain bdrv_drain().
>

Ditto.

>> +}
>> +
>>   static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
>>   {
>>       BDRVQuorumState *s = bs->opaque;
>> @@ -1075,6 +1146,9 @@ static BlockDriver bdrv_quorum = {
>>       .bdrv_detach_aio_context            = quorum_detach_aio_context,
>>       .bdrv_attach_aio_context            = quorum_attach_aio_context,
>>
>> +    .bdrv_add_child                     = quorum_add_child,
>> +    .bdrv_del_child                     = quorum_del_child,
>> +
>>       .is_filter                          = true,
>>       .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
>>   };
>> diff --git a/include/block/block.h b/include/block/block.h
>> index 694ca76..52902cd 100644
>> --- a/include/block/block.h
>> +++ b/include/block/block.h
>> @@ -476,6 +476,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs);
>>   void bdrv_ref(BlockDriverState *bs);
>>   void bdrv_unref(BlockDriverState *bs);
>>   void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
>> +BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
>> +                             BlockDriverState *child_bs,
>> +                             const char *child_name,
>> +                             const BdrvChildRole *child_role);
>>
>>   bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
>>   void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
>>
>
> None of the things I pointed out above is critical, but I'd still rather
> see them fixed before giving my R-b.

I'm glab to hear that, will send out next version.

Thanks
	-Xie
>
> Max
>
Alberto Garcia May 9, 2016, 3:52 p.m. UTC | #4
On Wed 13 Apr 2016 10:33:08 AM CEST, Changlong Xie wrote:

Sorry for the late reply!

The patch looks good, I have some additional comments on top of what Max
Wrote, nothing serious though :)

> @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
>  typedef struct BDRVQuorumState {
>      BdrvChild **children;  /* children BlockDriverStates */
>      int num_children;      /* children count */
> +    uint64_t last_index;   /* indicate the child role name of the last
> +                            * element of children array
> +                            */

I think you can use a regular 'unsigned' here, it's simpler and more
efficient. We're not going to have 2^32 Quorum children, are we? :)

> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
> +                             Error **errp)
> +{
> +    BDRVQuorumState *s = bs->opaque;
> +    BdrvChild *child;
> +    char indexstr[32];
> +    int ret;
> +
> +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
> +           s->last_index <= UINT64_MAX);

That last condition has no practical effect. last_index is a uint64_t so
s->last_index <= UINT64_MAX is always going to be true.

> +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
> +    s->children[s->num_children++] = child;

Slightly simpler way:

       s->children = g_renew(BdrvChild *, s->children, ++s->num_children);
       s->children[s->num_children] = child;

But this is not very important, so you can leave it as it is now if you
prefer.

> +    /* child->name is "children.%d" */
> +    assert(!strncmp(child->name, "children.", 9));

I actually don't think there's anything wrong with this assertion, but
if you decide to keep it you can use g_str_has_prefix() instead, which
is a bit easier and more readable.

> +    /* We can safely remove this child now */
> +    memmove(&s->children[i], &s->children[i + 1],
> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
> +    bdrv_unref_child(bs, child);

Question: do we want to decrement last_index if 'i' is the last
children? Something like:

if (i == s->num_children - 1) {
   s->last_index--;
} else {
   memmove(...)
}
s->children = g_renew(...)

Berto
Max Reitz May 9, 2016, 4:50 p.m. UTC | #5
On 09.05.2016 17:52, Alberto Garcia wrote:
> On Wed 13 Apr 2016 10:33:08 AM CEST, Changlong Xie wrote:

[...]

>> +    /* We can safely remove this child now */
>> +    memmove(&s->children[i], &s->children[i + 1],
>> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
>> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
>> +    bdrv_unref_child(bs, child);
> 
> Question: do we want to decrement last_index if 'i' is the last
> children? Something like:

I think it's better to keep it consistent. It probably wouldn't hurt to
do this, but I don't see any real benefit and on the other hand it looks
a bit strange (to the user) to special-case replacement of the last child.

Max

> if (i == s->num_children - 1) {
>    s->last_index--;
> } else {
>    memmove(...)
> }
> s->children = g_renew(...)
> 
> Berto
>
Changlong Xie May 10, 2016, 6:59 a.m. UTC | #6
On 05/09/2016 11:52 PM, Alberto Garcia wrote:
> On Wed 13 Apr 2016 10:33:08 AM CEST, Changlong Xie wrote:
>
> Sorry for the late reply!
>

Never mind : )

> The patch looks good, I have some additional comments on top of what Max
> Wrote, nothing serious though :)
>
>> @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
>>   typedef struct BDRVQuorumState {
>>       BdrvChild **children;  /* children BlockDriverStates */
>>       int num_children;      /* children count */
>> +    uint64_t last_index;   /* indicate the child role name of the last
>> +                            * element of children array
>> +                            */
>
> I think you can use a regular 'unsigned' here, it's simpler and more
> efficient. We're not going to have 2^32 Quorum children, are we? :)
>

Actually, i tried to use 'unsinged' here in my first version. But 
thinking of if someone did crazy child add/delete test(add 10 children 
per second), it will overflow in about 2^32/(60*60*24*365*10) = 13 
years, so i choiced uint64_t(2^64 is big enough) here.
Now, i argree with you, it's overwrought. Will use 'unsigned' in next 
version.

>> +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
>> +                             Error **errp)
>> +{
>> +    BDRVQuorumState *s = bs->opaque;
>> +    BdrvChild *child;
>> +    char indexstr[32];
>> +    int ret;
>> +
>> +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
>> +           s->last_index <= UINT64_MAX);
>
> That last condition has no practical effect. last_index is a uint64_t so
> s->last_index <= UINT64_MAX is always going to be true.

Yes, it's redundant code.

>
>> +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
>> +    s->children[s->num_children++] = child;
>
> Slightly simpler way:
>
>         s->children = g_renew(BdrvChild *, s->children, ++s->num_children);
>         s->children[s->num_children] = child;

Overflow arrays, should (s->num_children - 1) here. I'll keep my 
original one.

>
> But this is not very important, so you can leave it as it is now if you
> prefer.
>
>> +    /* child->name is "children.%d" */
>> +    assert(!strncmp(child->name, "children.", 9));
>
> I actually don't think there's anything wrong with this assertion, but
> if you decide to keep it you can use g_str_has_prefix() instead, which
> is a bit easier and more readable.
>

Just as Max said, it's extra check, and will remove it.

>> +    /* We can safely remove this child now */
>> +    memmove(&s->children[i], &s->children[i + 1],
>> +            (s->num_children - i - 1) * sizeof(BdrvChild *));
>> +    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
>> +    bdrv_unref_child(bs, child);
>
> Question: do we want to decrement last_index if 'i' is the last
> children? Something like:
>

I agree with Max, it seems no benifit(although will save number 
resources if (i == s->num_children - 1)) here.

Thanks
	-Xie

> if (i == s->num_children - 1) {
>     s->last_index--;
> } else {
>     memmove(...)
> }
> s->children = g_renew(...)
>
> Berto
>
>
> .
>
Kevin Wolf May 10, 2016, 8:39 a.m. UTC | #7
Am 09.05.2016 um 17:52 hat Alberto Garcia geschrieben:
> On Wed 13 Apr 2016 10:33:08 AM CEST, Changlong Xie wrote:
> 
> Sorry for the late reply!
> 
> The patch looks good, I have some additional comments on top of what Max
> Wrote, nothing serious though :)
> 
> > @@ -67,6 +68,9 @@ typedef struct QuorumVotes {
> >  typedef struct BDRVQuorumState {
> >      BdrvChild **children;  /* children BlockDriverStates */
> >      int num_children;      /* children count */
> > +    uint64_t last_index;   /* indicate the child role name of the last
> > +                            * element of children array
> > +                            */
> 
> I think you can use a regular 'unsigned' here, it's simpler and more
> efficient. We're not going to have 2^32 Quorum children, are we? :)
> 
> > +static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
> > +                             Error **errp)
> > +{
> > +    BDRVQuorumState *s = bs->opaque;
> > +    BdrvChild *child;
> > +    char indexstr[32];
> > +    int ret;
> > +
> > +    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
> > +           s->last_index <= UINT64_MAX);
> 
> That last condition has no practical effect. last_index is a uint64_t so
> s->last_index <= UINT64_MAX is always going to be true.
> 
> > +    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
> > +    s->children[s->num_children++] = child;
> 
> Slightly simpler way:
> 
>        s->children = g_renew(BdrvChild *, s->children, ++s->num_children);
>        s->children[s->num_children] = child;

Without having checked the context, this code is not equivalent. You
need to access s->children[s->num_children - 1] now in the second line.

> But this is not very important, so you can leave it as it is now if you
> prefer.
> 
> > +    /* child->name is "children.%d" */
> > +    assert(!strncmp(child->name, "children.", 9));
> 
> I actually don't think there's anything wrong with this assertion, but
> if you decide to keep it you can use g_str_has_prefix() instead, which
> is a bit easier and more readable.

There's also good old strstart() from cutils.c.

Kevin
Alberto Garcia May 10, 2016, 8:45 a.m. UTC | #8
On Tue 10 May 2016 10:39:21 AM CEST, Kevin Wolf wrote:
>>        s->children = g_renew(BdrvChild *, s->children, ++s->num_children);
>>        s->children[s->num_children] = child;
>
> Without having checked the context, this code is not equivalent. You
> need to access s->children[s->num_children - 1] now in the second
> line.

Yeah, you are both right, sorry for the mistake!

Berto
diff mbox

Patch

diff --git a/block.c b/block.c
index 68cd3b2..4bdc6b3 100644
--- a/block.c
+++ b/block.c
@@ -1176,10 +1176,10 @@  BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
     return child;
 }
 
-static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
-                                    BlockDriverState *child_bs,
-                                    const char *child_name,
-                                    const BdrvChildRole *child_role)
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+                             BlockDriverState *child_bs,
+                             const char *child_name,
+                             const BdrvChildRole *child_role)
 {
     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
diff --git a/block/quorum.c b/block/quorum.c
index da15465..2553f82 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -14,6 +14,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "block/block_int.h"
 #include "qapi/qmp/qbool.h"
 #include "qapi/qmp/qdict.h"
@@ -67,6 +68,9 @@  typedef struct QuorumVotes {
 typedef struct BDRVQuorumState {
     BdrvChild **children;  /* children BlockDriverStates */
     int num_children;      /* children count */
+    uint64_t last_index;   /* indicate the child role name of the last
+                            * element of children array
+                            */
     int threshold;         /* if less than threshold children reads gave the
                             * same result a quorum error occurs.
                             */
@@ -898,9 +902,9 @@  static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EINVAL;
         goto exit;
     }
-    if (s->num_children < 2) {
+    if (s->num_children < 1) {
         error_setg(&local_err,
-                   "Number of provided children must be greater than 1");
+                   "Number of provided children must be 1 or more");
         ret = -EINVAL;
         goto exit;
     }
@@ -964,6 +968,7 @@  static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
 
         opened[i] = true;
     }
+    s->last_index = s->num_children;
 
     g_free(opened);
     goto exit;
@@ -1020,6 +1025,72 @@  static void quorum_attach_aio_context(BlockDriverState *bs,
     }
 }
 
+static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
+                             Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    BdrvChild *child;
+    char indexstr[32];
+    int ret;
+
+    assert(s->num_children <= INT_MAX / sizeof(BdrvChild *) &&
+           s->last_index <= UINT64_MAX);
+    if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
+        s->last_index == UINT64_MAX) {
+        error_setg(errp, "Too many children");
+        return;
+    }
+
+    ret = snprintf(indexstr, 32, "children.%" PRIu64, s->last_index);
+    if (ret < 0 || ret >= 32) {
+        error_setg(errp, "cannot generate child name");
+        return;
+    }
+    s->last_index++;
+
+    bdrv_drain(bs);
+
+    /* We can safely add the child now */
+    bdrv_ref(child_bs);
+    child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+    s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
+    s->children[s->num_children++] = child;
+}
+
+static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
+                             Error **errp)
+{
+    BDRVQuorumState *s = bs->opaque;
+    int i;
+
+    for (i = 0; i < s->num_children; i++) {
+        if (s->children[i] == child) {
+            break;
+        }
+    }
+
+    /* we have checked it in bdrv_del_child() */
+    assert(i < s->num_children);
+
+    if (s->num_children <= s->threshold) {
+        error_setg(errp,
+            "The number of children cannot be lower than the vote threshold %d",
+            s->threshold);
+        return;
+    }
+
+    /* child->name is "children.%d" */
+    assert(!strncmp(child->name, "children.", 9));
+
+    bdrv_drain(bs);
+
+    /* We can safely remove this child now */
+    memmove(&s->children[i], &s->children[i + 1],
+            (s->num_children - i - 1) * sizeof(BdrvChild *));
+    s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+    bdrv_unref_child(bs, child);
+}
+
 static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
 {
     BDRVQuorumState *s = bs->opaque;
@@ -1075,6 +1146,9 @@  static BlockDriver bdrv_quorum = {
     .bdrv_detach_aio_context            = quorum_detach_aio_context,
     .bdrv_attach_aio_context            = quorum_attach_aio_context,
 
+    .bdrv_add_child                     = quorum_add_child,
+    .bdrv_del_child                     = quorum_del_child,
+
     .is_filter                          = true,
     .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
 };
diff --git a/include/block/block.h b/include/block/block.h
index 694ca76..52902cd 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -476,6 +476,10 @@  void bdrv_disable_copy_on_read(BlockDriverState *bs);
 void bdrv_ref(BlockDriverState *bs);
 void bdrv_unref(BlockDriverState *bs);
 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+                             BlockDriverState *child_bs,
+                             const char *child_name,
+                             const BdrvChildRole *child_role);
 
 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);