diff mbox series

[v2] block/rbd: implement .bdrv_get_allocated_file_size callback

Message ID 20190510153346.238366-1-sgarzare@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] block/rbd: implement .bdrv_get_allocated_file_size callback | expand

Commit Message

Stefano Garzarella May 10, 2019, 3:33 p.m. UTC
This patch allows 'qemu-img info' to show the 'disk size' for
the RBD images that have the fast-diff feature enabled.

If this feature is enabled, we use the rbd_diff_iterate2() API
to calculate the allocated size for the image.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
v2:
  - calculate the actual usage only if the fast-diff feature is
    enabled [Jason]
---
 block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

Comments

John Snow June 26, 2019, 9:04 p.m. UTC | #1
It looks like this has hit a 30 day expiration without any reviews or
being merged; do we still want this? If so, can you please resend?

On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> This patch allows 'qemu-img info' to show the 'disk size' for
> the RBD images that have the fast-diff feature enabled.
> 
> If this feature is enabled, we use the rbd_diff_iterate2() API
> to calculate the allocated size for the image.
> 
> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> ---
> v2:
>   - calculate the actual usage only if the fast-diff feature is
>     enabled [Jason]
> ---
>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 54 insertions(+)
> 
> diff --git a/block/rbd.c b/block/rbd.c
> index 0c549c9935..f1bc76ab80 100644
> --- a/block/rbd.c
> +++ b/block/rbd.c
> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
>      return info.size;
>  }
>  
> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> +                                 void *arg)
> +{
> +    int64_t *alloc_size = (int64_t *) arg;
> +
> +    if (exists) {
> +        (*alloc_size) += len;
> +    }
> +
> +    return 0;
> +}
> +
> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> +{
> +    BDRVRBDState *s = bs->opaque;
> +    uint64_t flags, features;
> +    int64_t alloc_size = 0;
> +    int r;
> +
> +    r = rbd_get_flags(s->image, &flags);
> +    if (r < 0) {
> +        return r;
> +    }
> +

Do you know where rbd_get_flags is documented? I can't seem to quickly
find a reference that tells me what to expect from calling it. It
returns an int, I guess an error code, but how can I confirm this?

*clones the ceph repository*

src/librbd/internal.cc get_flags convinces me it probably works like I
think, but is there not a reference here?

> +    r = rbd_get_features(s->image, &features);
> +    if (r < 0) {
> +        return r;
> +    }
> +
> +    /*
> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> +     * very slow on a big image.
> +     */
> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> +        return -1;
> +    }
> +

(Is there a reference for the list of flags to make sure there aren't
other cases we might want to skip this?)

It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
of -1, based on the idea that bdrv_get_allocated_file_size returns
-ENOMEDIUM in a prominent error case -- let's match that error convention.

(Well, I wonder what the librbd calls are returning and if THOSE mean
anything.)

> +    /*
> +     * rbd_diff_iterate2(), if the source snapshot name is NULL, invokes
> +     * the callback on all allocated regions of the image.
> +     */
> +    r = rbd_diff_iterate2(s->image, NULL, 0,
> +                          bs->total_sectors * BDRV_SECTOR_SIZE, 0, 1,
> +                          &rbd_allocated_size_cb, &alloc_size);
> +    if (r < 0) {
> +        return r;
> +    }
> +

I guess I'll take your word for it. ¯\_(ツ)_/¯

> +    return alloc_size;
> +}
> +
>  static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
>                                               int64_t offset,
>                                               PreallocMode prealloc,
> @@ -1254,6 +1307,7 @@ static BlockDriver bdrv_rbd = {
>      .bdrv_get_info          = qemu_rbd_getinfo,
>      .create_opts            = &qemu_rbd_create_opts,
>      .bdrv_getlength         = qemu_rbd_getlength,
> +    .bdrv_get_allocated_file_size = qemu_rbd_get_allocated_file_size,
>      .bdrv_co_truncate       = qemu_rbd_co_truncate,
>      .protocol_name          = "rbd",
>  
>
Stefano Garzarella June 27, 2019, 8:48 a.m. UTC | #2
On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> It looks like this has hit a 30 day expiration without any reviews or
> being merged; do we still want this? If so, can you please resend?

Yes, I think we still want :)

Is it okay if I send a v3 following your comments?

> 
> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> > This patch allows 'qemu-img info' to show the 'disk size' for
> > the RBD images that have the fast-diff feature enabled.
> > 
> > If this feature is enabled, we use the rbd_diff_iterate2() API
> > to calculate the allocated size for the image.
> > 
> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > ---
> > v2:
> >   - calculate the actual usage only if the fast-diff feature is
> >     enabled [Jason]
> > ---
> >  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 54 insertions(+)
> > 
> > diff --git a/block/rbd.c b/block/rbd.c
> > index 0c549c9935..f1bc76ab80 100644
> > --- a/block/rbd.c
> > +++ b/block/rbd.c
> > @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> >      return info.size;
> >  }
> >  
> > +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> > +                                 void *arg)
> > +{
> > +    int64_t *alloc_size = (int64_t *) arg;
> > +
> > +    if (exists) {
> > +        (*alloc_size) += len;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> > +{
> > +    BDRVRBDState *s = bs->opaque;
> > +    uint64_t flags, features;
> > +    int64_t alloc_size = 0;
> > +    int r;
> > +
> > +    r = rbd_get_flags(s->image, &flags);
> > +    if (r < 0) {
> > +        return r;
> > +    }
> > +
> 
> Do you know where rbd_get_flags is documented? I can't seem to quickly
> find a reference that tells me what to expect from calling it. It
> returns an int, I guess an error code, but how can I confirm this?
> 
> *clones the ceph repository*
> 
> src/librbd/internal.cc get_flags convinces me it probably works like I
> think, but is there not a reference here?
> 

Good question!
I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
they print an error message if the return value is less than 0.

A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
the errors defined in errno.h

> > +    r = rbd_get_features(s->image, &features);
> > +    if (r < 0) {
> > +        return r;
> > +    }
> > +
> > +    /*
> > +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> > +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> > +     * very slow on a big image.
> > +     */
> > +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> > +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> > +        return -1;
> > +    }
> > +
> 
> (Is there a reference for the list of flags to make sure there aren't
> other cases we might want to skip this?)

Unfortunately no :(
As Jason suggested, I followed what libvirt did in the
volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]

> 
> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
> of -1, based on the idea that bdrv_get_allocated_file_size returns
> -ENOMEDIUM in a prominent error case -- let's match that error convention.

Sure, -ENOTSUP is absolutely better!

> 
> (Well, I wonder what the librbd calls are returning and if THOSE mean
> anything.)

I hope they return an errno.h errors, but I'm not sure if the meaning
make sense for us.

Do you think is better to return -ENOTSUP or -EIO when librbd calls
fail?


Thanks for your comments,
Stefano
John Snow June 27, 2019, 5:24 p.m. UTC | #3
On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
>> It looks like this has hit a 30 day expiration without any reviews or
>> being merged; do we still want this? If so, can you please resend?
> 
> Yes, I think we still want :)
> 
> Is it okay if I send a v3 following your comments?
> 

Yes, but I don't know who is responsible for final approval; I guess
that's Josh Durgin?

>>
>> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
>>> This patch allows 'qemu-img info' to show the 'disk size' for
>>> the RBD images that have the fast-diff feature enabled.
>>>
>>> If this feature is enabled, we use the rbd_diff_iterate2() API
>>> to calculate the allocated size for the image.
>>>
>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
>>> ---
>>> v2:
>>>   - calculate the actual usage only if the fast-diff feature is
>>>     enabled [Jason]
>>> ---
>>>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 54 insertions(+)
>>>
>>> diff --git a/block/rbd.c b/block/rbd.c
>>> index 0c549c9935..f1bc76ab80 100644
>>> --- a/block/rbd.c
>>> +++ b/block/rbd.c
>>> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
>>>      return info.size;
>>>  }
>>>  
>>> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
>>> +                                 void *arg)
>>> +{
>>> +    int64_t *alloc_size = (int64_t *) arg;
>>> +
>>> +    if (exists) {
>>> +        (*alloc_size) += len;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
>>> +{
>>> +    BDRVRBDState *s = bs->opaque;
>>> +    uint64_t flags, features;
>>> +    int64_t alloc_size = 0;
>>> +    int r;
>>> +
>>> +    r = rbd_get_flags(s->image, &flags);
>>> +    if (r < 0) {
>>> +        return r;
>>> +    }
>>> +
>>
>> Do you know where rbd_get_flags is documented? I can't seem to quickly
>> find a reference that tells me what to expect from calling it. It
>> returns an int, I guess an error code, but how can I confirm this?
>>
>> *clones the ceph repository*
>>
>> src/librbd/internal.cc get_flags convinces me it probably works like I
>> think, but is there not a reference here?
>>
> 
> Good question!
> I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
> they print an error message if the return value is less than 0.
> 
> A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
> end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
> returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
> the errors defined in errno.h
> 
>>> +    r = rbd_get_features(s->image, &features);
>>> +    if (r < 0) {
>>> +        return r;
>>> +    }
>>> +
>>> +    /*
>>> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
>>> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
>>> +     * very slow on a big image.
>>> +     */
>>> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
>>> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
>>> +        return -1;
>>> +    }
>>> +
>>
>> (Is there a reference for the list of flags to make sure there aren't
>> other cases we might want to skip this?)
> 
> Unfortunately no :(
> As Jason suggested, I followed what libvirt did in the
> volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]
> 
>>
>> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
>> of -1, based on the idea that bdrv_get_allocated_file_size returns
>> -ENOMEDIUM in a prominent error case -- let's match that error convention.
> 
> Sure, -ENOTSUP is absolutely better!
> 
>>
>> (Well, I wonder what the librbd calls are returning and if THOSE mean
>> anything.)
> 
> I hope they return an errno.h errors, but I'm not sure if the meaning
> make sense for us.
> 
> Do you think is better to return -ENOTSUP or -EIO when librbd calls
> fail?
> 

I'll be honest, I have no idea because I don't know what failure of
these calls means _at all_, so I don't know if it should be something
severe like EIO or something more mundane.

I guess just leave them alone in absence of better information, honestly.

> 
> Thanks for your comments,
> Stefano
> 

Thank you for trying to patch rbd :)
Jason Dillaman June 27, 2019, 7:43 p.m. UTC | #4
On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
>
>
>
> On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> > On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> >> It looks like this has hit a 30 day expiration without any reviews or
> >> being merged; do we still want this? If so, can you please resend?
> >
> > Yes, I think we still want :)
> >
> > Is it okay if I send a v3 following your comments?
> >
>
> Yes, but I don't know who is responsible for final approval; I guess
> that's Josh Durgin?

I'm the new (for the past several years) upstream PTL for RBD, so feel
free to tag me.

> >>
> >> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> >>> This patch allows 'qemu-img info' to show the 'disk size' for
> >>> the RBD images that have the fast-diff feature enabled.
> >>>
> >>> If this feature is enabled, we use the rbd_diff_iterate2() API
> >>> to calculate the allocated size for the image.
> >>>
> >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> >>> ---
> >>> v2:
> >>>   - calculate the actual usage only if the fast-diff feature is
> >>>     enabled [Jason]
> >>> ---
> >>>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>  1 file changed, 54 insertions(+)
> >>>
> >>> diff --git a/block/rbd.c b/block/rbd.c
> >>> index 0c549c9935..f1bc76ab80 100644
> >>> --- a/block/rbd.c
> >>> +++ b/block/rbd.c
> >>> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> >>>      return info.size;
> >>>  }
> >>>
> >>> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> >>> +                                 void *arg)
> >>> +{
> >>> +    int64_t *alloc_size = (int64_t *) arg;
> >>> +
> >>> +    if (exists) {
> >>> +        (*alloc_size) += len;
> >>> +    }
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> >>> +{
> >>> +    BDRVRBDState *s = bs->opaque;
> >>> +    uint64_t flags, features;
> >>> +    int64_t alloc_size = 0;
> >>> +    int r;
> >>> +
> >>> +    r = rbd_get_flags(s->image, &flags);
> >>> +    if (r < 0) {
> >>> +        return r;
> >>> +    }
> >>> +
> >>
> >> Do you know where rbd_get_flags is documented? I can't seem to quickly
> >> find a reference that tells me what to expect from calling it. It
> >> returns an int, I guess an error code, but how can I confirm this?
> >>
> >> *clones the ceph repository*
> >>
> >> src/librbd/internal.cc get_flags convinces me it probably works like I
> >> think, but is there not a reference here?
> >>
> >
> > Good question!
> > I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
> > they print an error message if the return value is less than 0.
> >
> > A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
> > end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
> > returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
> > the errors defined in errno.h
> >
> >>> +    r = rbd_get_features(s->image, &features);
> >>> +    if (r < 0) {
> >>> +        return r;
> >>> +    }
> >>> +
> >>> +    /*
> >>> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> >>> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> >>> +     * very slow on a big image.
> >>> +     */
> >>> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> >>> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> >>> +        return -1;
> >>> +    }
> >>> +
> >>
> >> (Is there a reference for the list of flags to make sure there aren't
> >> other cases we might want to skip this?)
> >
> > Unfortunately no :(
> > As Jason suggested, I followed what libvirt did in the
> > volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]

These are the only ones.

> >>
> >> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
> >> of -1, based on the idea that bdrv_get_allocated_file_size returns
> >> -ENOMEDIUM in a prominent error case -- let's match that error convention.
> >
> > Sure, -ENOTSUP is absolutely better!
> >
> >>
> >> (Well, I wonder what the librbd calls are returning and if THOSE mean
> >> anything.)
> >
> > I hope they return an errno.h errors, but I'm not sure if the meaning
> > make sense for us.
> >
> > Do you think is better to return -ENOTSUP or -EIO when librbd calls
> > fail?
> >
>
> I'll be honest, I have no idea because I don't know what failure of
> these calls means _at all_, so I don't know if it should be something
> severe like EIO or something more mundane.
>
> I guess just leave them alone in absence of better information, honestly.

It looks like QEMU treats any negative error code like the actual size
isn't available. However, for clarity I would vote for -ENOTSUPP since
that's what would be returned if the driver doesn't support it.

> >
> > Thanks for your comments,
> > Stefano
> >
>
> Thank you for trying to patch rbd :)
John Snow June 27, 2019, 7:45 p.m. UTC | #5
On 6/27/19 3:43 PM, Jason Dillaman wrote:
> On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
>>
>>
>>
>> On 6/27/19 4:48 AM, Stefano Garzarella wrote:
>>> On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
>>>> It looks like this has hit a 30 day expiration without any reviews or
>>>> being merged; do we still want this? If so, can you please resend?
>>>
>>> Yes, I think we still want :)
>>>
>>> Is it okay if I send a v3 following your comments?
>>>
>>
>> Yes, but I don't know who is responsible for final approval; I guess
>> that's Josh Durgin?
> 
> I'm the new (for the past several years) upstream PTL for RBD, so feel
> free to tag me.
> 

I got Josh's name out of MAINTAINERS, does it need an update?

> RBD
> M: Josh Durgin <jdurgin@redhat.com>
> L: qemu-block@nongnu.org
> S: Supported
> F: block/rbd.c
Jason Dillaman June 28, 2019, 12:23 a.m. UTC | #6
On Thu, Jun 27, 2019 at 3:45 PM John Snow <jsnow@redhat.com> wrote:
>
>
>
> On 6/27/19 3:43 PM, Jason Dillaman wrote:
> > On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
> >>
> >>
> >>
> >> On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> >>> On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> >>>> It looks like this has hit a 30 day expiration without any reviews or
> >>>> being merged; do we still want this? If so, can you please resend?
> >>>
> >>> Yes, I think we still want :)
> >>>
> >>> Is it okay if I send a v3 following your comments?
> >>>
> >>
> >> Yes, but I don't know who is responsible for final approval; I guess
> >> that's Josh Durgin?
> >
> > I'm the new (for the past several years) upstream PTL for RBD, so feel
> > free to tag me.
> >
>
> I got Josh's name out of MAINTAINERS, does it need an update?
>
> > RBD
> > M: Josh Durgin <jdurgin@redhat.com>
> > L: qemu-block@nongnu.org
> > S: Supported
> > F: block/rbd.c

Yes, I'll submit a patch to update that tomorrow.
Stefano Garzarella June 28, 2019, 8:59 a.m. UTC | #7
On Thu, Jun 27, 2019 at 03:43:04PM -0400, Jason Dillaman wrote:
> On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
> > On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> > > On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> > >> It looks like this has hit a 30 day expiration without any reviews or
> > >> being merged; do we still want this? If so, can you please resend?
> > >
> > > Yes, I think we still want :)
> > >
> > > Is it okay if I send a v3 following your comments?
> > >
> >
> > Yes, but I don't know who is responsible for final approval; I guess
> > that's Josh Durgin?
> 
> I'm the new (for the past several years) upstream PTL for RBD, so feel
> free to tag me.
> 
> > >>
> > >> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> > >>> This patch allows 'qemu-img info' to show the 'disk size' for
> > >>> the RBD images that have the fast-diff feature enabled.
> > >>>
> > >>> If this feature is enabled, we use the rbd_diff_iterate2() API
> > >>> to calculate the allocated size for the image.
> > >>>
> > >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > >>> ---
> > >>> v2:
> > >>>   - calculate the actual usage only if the fast-diff feature is
> > >>>     enabled [Jason]
> > >>> ---
> > >>>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >>>  1 file changed, 54 insertions(+)
> > >>>
> > >>> diff --git a/block/rbd.c b/block/rbd.c
> > >>> index 0c549c9935..f1bc76ab80 100644
> > >>> --- a/block/rbd.c
> > >>> +++ b/block/rbd.c
> > >>> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> > >>>      return info.size;
> > >>>  }
> > >>>
> > >>> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> > >>> +                                 void *arg)
> > >>> +{
> > >>> +    int64_t *alloc_size = (int64_t *) arg;
> > >>> +
> > >>> +    if (exists) {
> > >>> +        (*alloc_size) += len;
> > >>> +    }
> > >>> +
> > >>> +    return 0;
> > >>> +}
> > >>> +
> > >>> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> > >>> +{
> > >>> +    BDRVRBDState *s = bs->opaque;
> > >>> +    uint64_t flags, features;
> > >>> +    int64_t alloc_size = 0;
> > >>> +    int r;
> > >>> +
> > >>> +    r = rbd_get_flags(s->image, &flags);
> > >>> +    if (r < 0) {
> > >>> +        return r;
> > >>> +    }
> > >>> +
> > >>
> > >> Do you know where rbd_get_flags is documented? I can't seem to quickly
> > >> find a reference that tells me what to expect from calling it. It
> > >> returns an int, I guess an error code, but how can I confirm this?
> > >>
> > >> *clones the ceph repository*
> > >>
> > >> src/librbd/internal.cc get_flags convinces me it probably works like I
> > >> think, but is there not a reference here?
> > >>
> > >
> > > Good question!
> > > I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
> > > they print an error message if the return value is less than 0.
> > >
> > > A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
> > > end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
> > > returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
> > > the errors defined in errno.h
> > >
> > >>> +    r = rbd_get_features(s->image, &features);
> > >>> +    if (r < 0) {
> > >>> +        return r;
> > >>> +    }
> > >>> +
> > >>> +    /*
> > >>> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> > >>> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> > >>> +     * very slow on a big image.
> > >>> +     */
> > >>> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> > >>> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> > >>> +        return -1;
> > >>> +    }
> > >>> +
> > >>
> > >> (Is there a reference for the list of flags to make sure there aren't
> > >> other cases we might want to skip this?)
> > >
> > > Unfortunately no :(
> > > As Jason suggested, I followed what libvirt did in the
> > > volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]
> 
> These are the only ones.

Thanks for the clarification!

> 
> > >>
> > >> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
> > >> of -1, based on the idea that bdrv_get_allocated_file_size returns
> > >> -ENOMEDIUM in a prominent error case -- let's match that error convention.
> > >
> > > Sure, -ENOTSUP is absolutely better!
> > >
> > >>
> > >> (Well, I wonder what the librbd calls are returning and if THOSE mean
> > >> anything.)
> > >
> > > I hope they return an errno.h errors, but I'm not sure if the meaning
> > > make sense for us.
> > >
> > > Do you think is better to return -ENOTSUP or -EIO when librbd calls
> > > fail?
> > >
> >
> > I'll be honest, I have no idea because I don't know what failure of
> > these calls means _at all_, so I don't know if it should be something
> > severe like EIO or something more mundane.
> >
> > I guess just leave them alone in absence of better information, honestly.
> 
> It looks like QEMU treats any negative error code like the actual size
> isn't available. However, for clarity I would vote for -ENOTSUPP since
> that's what would be returned if the driver doesn't support it.
> 

Do you mean to return -ENOTSUP even when there's a runtime error in
rbd_get_features() or rbd_get_flags() or rbd_diff_iterate2?

Thanks,
Stefano
Jason Dillaman July 2, 2019, 3:09 p.m. UTC | #8
On Fri, Jun 28, 2019 at 4:59 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Thu, Jun 27, 2019 at 03:43:04PM -0400, Jason Dillaman wrote:
> > On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
> > > On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> > > > On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> > > >> It looks like this has hit a 30 day expiration without any reviews or
> > > >> being merged; do we still want this? If so, can you please resend?
> > > >
> > > > Yes, I think we still want :)
> > > >
> > > > Is it okay if I send a v3 following your comments?
> > > >
> > >
> > > Yes, but I don't know who is responsible for final approval; I guess
> > > that's Josh Durgin?
> >
> > I'm the new (for the past several years) upstream PTL for RBD, so feel
> > free to tag me.
> >
> > > >>
> > > >> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> > > >>> This patch allows 'qemu-img info' to show the 'disk size' for
> > > >>> the RBD images that have the fast-diff feature enabled.
> > > >>>
> > > >>> If this feature is enabled, we use the rbd_diff_iterate2() API
> > > >>> to calculate the allocated size for the image.
> > > >>>
> > > >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > > >>> ---
> > > >>> v2:
> > > >>>   - calculate the actual usage only if the fast-diff feature is
> > > >>>     enabled [Jason]
> > > >>> ---
> > > >>>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > >>>  1 file changed, 54 insertions(+)
> > > >>>
> > > >>> diff --git a/block/rbd.c b/block/rbd.c
> > > >>> index 0c549c9935..f1bc76ab80 100644
> > > >>> --- a/block/rbd.c
> > > >>> +++ b/block/rbd.c
> > > >>> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> > > >>>      return info.size;
> > > >>>  }
> > > >>>
> > > >>> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> > > >>> +                                 void *arg)
> > > >>> +{
> > > >>> +    int64_t *alloc_size = (int64_t *) arg;
> > > >>> +
> > > >>> +    if (exists) {
> > > >>> +        (*alloc_size) += len;
> > > >>> +    }
> > > >>> +
> > > >>> +    return 0;
> > > >>> +}
> > > >>> +
> > > >>> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> > > >>> +{
> > > >>> +    BDRVRBDState *s = bs->opaque;
> > > >>> +    uint64_t flags, features;
> > > >>> +    int64_t alloc_size = 0;
> > > >>> +    int r;
> > > >>> +
> > > >>> +    r = rbd_get_flags(s->image, &flags);
> > > >>> +    if (r < 0) {
> > > >>> +        return r;
> > > >>> +    }
> > > >>> +
> > > >>
> > > >> Do you know where rbd_get_flags is documented? I can't seem to quickly
> > > >> find a reference that tells me what to expect from calling it. It
> > > >> returns an int, I guess an error code, but how can I confirm this?
> > > >>
> > > >> *clones the ceph repository*
> > > >>
> > > >> src/librbd/internal.cc get_flags convinces me it probably works like I
> > > >> think, but is there not a reference here?
> > > >>
> > > >
> > > > Good question!
> > > > I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
> > > > they print an error message if the return value is less than 0.
> > > >
> > > > A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
> > > > end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
> > > > returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
> > > > the errors defined in errno.h
> > > >
> > > >>> +    r = rbd_get_features(s->image, &features);
> > > >>> +    if (r < 0) {
> > > >>> +        return r;
> > > >>> +    }
> > > >>> +
> > > >>> +    /*
> > > >>> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> > > >>> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> > > >>> +     * very slow on a big image.
> > > >>> +     */
> > > >>> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> > > >>> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> > > >>> +        return -1;
> > > >>> +    }
> > > >>> +
> > > >>
> > > >> (Is there a reference for the list of flags to make sure there aren't
> > > >> other cases we might want to skip this?)
> > > >
> > > > Unfortunately no :(
> > > > As Jason suggested, I followed what libvirt did in the
> > > > volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]
> >
> > These are the only ones.
>
> Thanks for the clarification!
>
> >
> > > >>
> > > >> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
> > > >> of -1, based on the idea that bdrv_get_allocated_file_size returns
> > > >> -ENOMEDIUM in a prominent error case -- let's match that error convention.
> > > >
> > > > Sure, -ENOTSUP is absolutely better!
> > > >
> > > >>
> > > >> (Well, I wonder what the librbd calls are returning and if THOSE mean
> > > >> anything.)
> > > >
> > > > I hope they return an errno.h errors, but I'm not sure if the meaning
> > > > make sense for us.
> > > >
> > > > Do you think is better to return -ENOTSUP or -EIO when librbd calls
> > > > fail?
> > > >
> > >
> > > I'll be honest, I have no idea because I don't know what failure of
> > > these calls means _at all_, so I don't know if it should be something
> > > severe like EIO or something more mundane.
> > >
> > > I guess just leave them alone in absence of better information, honestly.
> >
> > It looks like QEMU treats any negative error code like the actual size
> > isn't available. However, for clarity I would vote for -ENOTSUPP since
> > that's what would be returned if the driver doesn't support it.
> >
>
> Do you mean to return -ENOTSUP even when there's a runtime error in
> rbd_get_features() or rbd_get_flags() or rbd_diff_iterate2?

I was advocating for only returning -ENOTSUP in the case where
fast-diff isn't enabled or is flagged as invalid (instead of the
hard-coded -1). If one of the librbd APIs returns an error for some
reason, it seems like you can just pass it along like the current
patch does since any negative result is handled.

> Thanks,
> Stefano

--
Jason
Stefano Garzarella July 3, 2019, 7:31 a.m. UTC | #9
On Tue, Jul 02, 2019 at 11:09:14AM -0400, Jason Dillaman wrote:
> On Fri, Jun 28, 2019 at 4:59 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > On Thu, Jun 27, 2019 at 03:43:04PM -0400, Jason Dillaman wrote:
> > > On Thu, Jun 27, 2019 at 1:24 PM John Snow <jsnow@redhat.com> wrote:
> > > > On 6/27/19 4:48 AM, Stefano Garzarella wrote:
> > > > > On Wed, Jun 26, 2019 at 05:04:25PM -0400, John Snow wrote:
> > > > >> It looks like this has hit a 30 day expiration without any reviews or
> > > > >> being merged; do we still want this? If so, can you please resend?
> > > > >
> > > > > Yes, I think we still want :)
> > > > >
> > > > > Is it okay if I send a v3 following your comments?
> > > > >
> > > >
> > > > Yes, but I don't know who is responsible for final approval; I guess
> > > > that's Josh Durgin?
> > >
> > > I'm the new (for the past several years) upstream PTL for RBD, so feel
> > > free to tag me.
> > >
> > > > >>
> > > > >> On 5/10/19 11:33 AM, Stefano Garzarella wrote:
> > > > >>> This patch allows 'qemu-img info' to show the 'disk size' for
> > > > >>> the RBD images that have the fast-diff feature enabled.
> > > > >>>
> > > > >>> If this feature is enabled, we use the rbd_diff_iterate2() API
> > > > >>> to calculate the allocated size for the image.
> > > > >>>
> > > > >>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
> > > > >>> ---
> > > > >>> v2:
> > > > >>>   - calculate the actual usage only if the fast-diff feature is
> > > > >>>     enabled [Jason]
> > > > >>> ---
> > > > >>>  block/rbd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
> > > > >>>  1 file changed, 54 insertions(+)
> > > > >>>
> > > > >>> diff --git a/block/rbd.c b/block/rbd.c
> > > > >>> index 0c549c9935..f1bc76ab80 100644
> > > > >>> --- a/block/rbd.c
> > > > >>> +++ b/block/rbd.c
> > > > >>> @@ -1046,6 +1046,59 @@ static int64_t qemu_rbd_getlength(BlockDriverState *bs)
> > > > >>>      return info.size;
> > > > >>>  }
> > > > >>>
> > > > >>> +static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
> > > > >>> +                                 void *arg)
> > > > >>> +{
> > > > >>> +    int64_t *alloc_size = (int64_t *) arg;
> > > > >>> +
> > > > >>> +    if (exists) {
> > > > >>> +        (*alloc_size) += len;
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    return 0;
> > > > >>> +}
> > > > >>> +
> > > > >>> +static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
> > > > >>> +{
> > > > >>> +    BDRVRBDState *s = bs->opaque;
> > > > >>> +    uint64_t flags, features;
> > > > >>> +    int64_t alloc_size = 0;
> > > > >>> +    int r;
> > > > >>> +
> > > > >>> +    r = rbd_get_flags(s->image, &flags);
> > > > >>> +    if (r < 0) {
> > > > >>> +        return r;
> > > > >>> +    }
> > > > >>> +
> > > > >>
> > > > >> Do you know where rbd_get_flags is documented? I can't seem to quickly
> > > > >> find a reference that tells me what to expect from calling it. It
> > > > >> returns an int, I guess an error code, but how can I confirm this?
> > > > >>
> > > > >> *clones the ceph repository*
> > > > >>
> > > > >> src/librbd/internal.cc get_flags convinces me it probably works like I
> > > > >> think, but is there not a reference here?
> > > > >>
> > > > >
> > > > > Good question!
> > > > > I didn't find any docs, but looking in the ceph tests test/librbd/fsx.cc,
> > > > > they print an error message if the return value is less than 0.
> > > > >
> > > > > A 'get_flags' implemented in cls/rbd/cls_rbd.cc for example returns 0 at the
> > > > > end and -EINVAL in a try/catch. It also uses 'read_key()' that in some cases
> > > > > returns -EIO, so I hope that the error returned by rbd_get_flags() is one of
> > > > > the errors defined in errno.h
> > > > >
> > > > >>> +    r = rbd_get_features(s->image, &features);
> > > > >>> +    if (r < 0) {
> > > > >>> +        return r;
> > > > >>> +    }
> > > > >>> +
> > > > >>> +    /*
> > > > >>> +     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
> > > > >>> +     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
> > > > >>> +     * very slow on a big image.
> > > > >>> +     */
> > > > >>> +    if (!(features & RBD_FEATURE_FAST_DIFF) ||
> > > > >>> +        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
> > > > >>> +        return -1;
> > > > >>> +    }
> > > > >>> +
> > > > >>
> > > > >> (Is there a reference for the list of flags to make sure there aren't
> > > > >> other cases we might want to skip this?)
> > > > >
> > > > > Unfortunately no :(
> > > > > As Jason suggested, I followed what libvirt did in the
> > > > > volStorageBackendRBDUseFastDiff() [src/storage/storage_backend_rbd.c]
> > >
> > > These are the only ones.
> >
> > Thanks for the clarification!
> >
> > >
> > > > >>
> > > > >> It looks reasonable at a glance, but maybe let's return -ENOTSUP instead
> > > > >> of -1, based on the idea that bdrv_get_allocated_file_size returns
> > > > >> -ENOMEDIUM in a prominent error case -- let's match that error convention.
> > > > >
> > > > > Sure, -ENOTSUP is absolutely better!
> > > > >
> > > > >>
> > > > >> (Well, I wonder what the librbd calls are returning and if THOSE mean
> > > > >> anything.)
> > > > >
> > > > > I hope they return an errno.h errors, but I'm not sure if the meaning
> > > > > make sense for us.
> > > > >
> > > > > Do you think is better to return -ENOTSUP or -EIO when librbd calls
> > > > > fail?
> > > > >
> > > >
> > > > I'll be honest, I have no idea because I don't know what failure of
> > > > these calls means _at all_, so I don't know if it should be something
> > > > severe like EIO or something more mundane.
> > > >
> > > > I guess just leave them alone in absence of better information, honestly.
> > >
> > > It looks like QEMU treats any negative error code like the actual size
> > > isn't available. However, for clarity I would vote for -ENOTSUPP since
> > > that's what would be returned if the driver doesn't support it.
> > >
> >
> > Do you mean to return -ENOTSUP even when there's a runtime error in
> > rbd_get_features() or rbd_get_flags() or rbd_diff_iterate2?
> 
> I was advocating for only returning -ENOTSUP in the case where
> fast-diff isn't enabled or is flagged as invalid (instead of the
> hard-coded -1). If one of the librbd APIs returns an error for some
> reason, it seems like you can just pass it along like the current
> patch does since any negative result is handled.

Sure, that's what I wanted to do in v3, but I had a doubt.

Thanks for the clarification, I'll send a v3 following your and John
reviews.

Stefano
diff mbox series

Patch

diff --git a/block/rbd.c b/block/rbd.c
index 0c549c9935..f1bc76ab80 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -1046,6 +1046,59 @@  static int64_t qemu_rbd_getlength(BlockDriverState *bs)
     return info.size;
 }
 
+static int rbd_allocated_size_cb(uint64_t offset, size_t len, int exists,
+                                 void *arg)
+{
+    int64_t *alloc_size = (int64_t *) arg;
+
+    if (exists) {
+        (*alloc_size) += len;
+    }
+
+    return 0;
+}
+
+static int64_t qemu_rbd_get_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVRBDState *s = bs->opaque;
+    uint64_t flags, features;
+    int64_t alloc_size = 0;
+    int r;
+
+    r = rbd_get_flags(s->image, &flags);
+    if (r < 0) {
+        return r;
+    }
+
+    r = rbd_get_features(s->image, &features);
+    if (r < 0) {
+        return r;
+    }
+
+    /*
+     * We use rbd_diff_iterate2() only if the RBD image have fast-diff
+     * feature enabled. If it is disabled, rbd_diff_iterate2() could be
+     * very slow on a big image.
+     */
+    if (!(features & RBD_FEATURE_FAST_DIFF) ||
+        (flags & RBD_FLAG_FAST_DIFF_INVALID)) {
+        return -1;
+    }
+
+    /*
+     * rbd_diff_iterate2(), if the source snapshot name is NULL, invokes
+     * the callback on all allocated regions of the image.
+     */
+    r = rbd_diff_iterate2(s->image, NULL, 0,
+                          bs->total_sectors * BDRV_SECTOR_SIZE, 0, 1,
+                          &rbd_allocated_size_cb, &alloc_size);
+    if (r < 0) {
+        return r;
+    }
+
+    return alloc_size;
+}
+
 static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
                                              int64_t offset,
                                              PreallocMode prealloc,
@@ -1254,6 +1307,7 @@  static BlockDriver bdrv_rbd = {
     .bdrv_get_info          = qemu_rbd_getinfo,
     .create_opts            = &qemu_rbd_create_opts,
     .bdrv_getlength         = qemu_rbd_getlength,
+    .bdrv_get_allocated_file_size = qemu_rbd_get_allocated_file_size,
     .bdrv_co_truncate       = qemu_rbd_co_truncate,
     .protocol_name          = "rbd",