diff mbox

[RFC,1/3] vmstate: error hint for failed equal checks

Message ID 20170606165510.33057-2-pasic@linux.vnet.ibm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Halil Pasic June 6, 2017, 4:55 p.m. UTC
In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
(it's actually the best we can do). Especially in these cases a verbose
error message is required.

Let's introduce infrastructure for specifying a error hint to be used if
equal check fails.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
---
Macros come in part 2. Once we are happy with the macros
this two patches should be squashed into one. 
---
 include/migration/vmstate.h |  1 +
 migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
 2 files changed, 32 insertions(+), 5 deletions(-)

Comments

Dr. David Alan Gilbert June 7, 2017, 9:51 a.m. UTC | #1
* Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
> (it's actually the best we can do). Especially in these cases a verbose
> error message is required.
> 
> Let's introduce infrastructure for specifying a error hint to be used if
> equal check fails.
> 
> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
> ---
> Macros come in part 2. Once we are happy with the macros
> this two patches should be squashed into one. 
> ---
>  include/migration/vmstate.h |  1 +
>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
>  2 files changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 66895623da..d90d9b12ca 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -200,6 +200,7 @@ typedef enum {
>  
>  struct VMStateField {
>      const char *name;
> +    const char *err_hint;
>      size_t offset;
>      size_t size;
>      size_t start;
> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> index 7287c6baa6..84d0545a38 100644
> --- a/migration/vmstate-types.c
> +++ b/migration/vmstate-types.c
> @@ -19,6 +19,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/queue.h"
>  #include "trace.h"
> +#include "qapi/error.h"
>  
>  /* bool */
>  
> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>                             VMStateField *field)
>  {
> +    Error *err = NULL;
>      int32_t *v = pv;
>      int32_t v2;
>      qemu_get_sbe32s(f, &v2);
> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>      if (*v == v2) {
>          return 0;
>      }
> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
> +    if (field->err_hint) {
> +        error_append_hint(&err, "%s\n", field->err_hint);
> +    }
> +    error_report_err(err);

I'm a bit worried as to whether the error_append_hint data gets
printed out by error_report_err if we're being driven by a QMP
monitor.
error_report_err uses error_printf_unless_qmp

Since this code doesn't really handle Error *'s back up,
and always prints it's errors into stderr, I'd prefer if you just
used error_report again for the hint, something like:

if (field->err_hint) {
  error_report("%" PRIx32 " != %" PRIx32 "(%s)",
               *v, v2, field->err_hint);
} else {
  error_report("%" PRIx32 " != %" PRIx32, *v, v2);
}

Dave

>      return -EINVAL;
>  }
>  
> @@ -259,6 +265,7 @@ const VMStateInfo vmstate_info_uint32 = {
>  static int get_uint32_equal(QEMUFile *f, void *pv, size_t size,
>                              VMStateField *field)
>  {
> +    Error *err = NULL;
>      uint32_t *v = pv;
>      uint32_t v2;
>      qemu_get_be32s(f, &v2);
> @@ -266,7 +273,11 @@ static int get_uint32_equal(QEMUFile *f, void *pv, size_t size,
>      if (*v == v2) {
>          return 0;
>      }
> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
> +    if (field->err_hint) {
> +        error_append_hint(&err, "%s\n", field->err_hint);
> +    }
> +    error_report_err(err);
>      return -EINVAL;
>  }
>  
> @@ -333,6 +344,7 @@ const VMStateInfo vmstate_info_nullptr = {
>  static int get_uint64_equal(QEMUFile *f, void *pv, size_t size,
>                              VMStateField *field)
>  {
> +    Error *err = NULL;
>      uint64_t *v = pv;
>      uint64_t v2;
>      qemu_get_be64s(f, &v2);
> @@ -340,7 +352,11 @@ static int get_uint64_equal(QEMUFile *f, void *pv, size_t size,
>      if (*v == v2) {
>          return 0;
>      }
> -    error_report("%" PRIx64 " != %" PRIx64, *v, v2);
> +    error_setg(&err, "%" PRIx64 " != %" PRIx64, *v, v2);
> +    if (field->err_hint) {
> +        error_append_hint(&err, "%s\n", field->err_hint);
> +    }
> +    error_report_err(err);
>      return -EINVAL;
>  }
>  
> @@ -356,6 +372,7 @@ const VMStateInfo vmstate_info_uint64_equal = {
>  static int get_uint8_equal(QEMUFile *f, void *pv, size_t size,
>                             VMStateField *field)
>  {
> +    Error *err = NULL;
>      uint8_t *v = pv;
>      uint8_t v2;
>      qemu_get_8s(f, &v2);
> @@ -363,7 +380,11 @@ static int get_uint8_equal(QEMUFile *f, void *pv, size_t size,
>      if (*v == v2) {
>          return 0;
>      }
> -    error_report("%x != %x", *v, v2);
> +    error_setg(&err, "%x != %x", *v, v2);
> +    if (field->err_hint) {
> +        error_append_hint(&err, "%s\n", field->err_hint);
> +    }
> +    error_report_err(err);
>      return -EINVAL;
>  }
>  
> @@ -379,6 +400,7 @@ const VMStateInfo vmstate_info_uint8_equal = {
>  static int get_uint16_equal(QEMUFile *f, void *pv, size_t size,
>                              VMStateField *field)
>  {
> +    Error *err = NULL;
>      uint16_t *v = pv;
>      uint16_t v2;
>      qemu_get_be16s(f, &v2);
> @@ -386,7 +408,11 @@ static int get_uint16_equal(QEMUFile *f, void *pv, size_t size,
>      if (*v == v2) {
>          return 0;
>      }
> -    error_report("%x != %x", *v, v2);
> +    error_setg(&err, "%x != %x", *v, v2);
> +    if (field->err_hint) {
> +        error_append_hint(&err, "%s\n", field->err_hint);
> +    }
> +    error_report_err(err);
>      return -EINVAL;
>  }
>  
> -- 
> 2.11.2
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Halil Pasic June 8, 2017, 11:05 a.m. UTC | #2
On 06/07/2017 11:51 AM, Dr. David Alan Gilbert wrote:
> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
>> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
>> (it's actually the best we can do). Especially in these cases a verbose
>> error message is required.
>>
>> Let's introduce infrastructure for specifying a error hint to be used if
>> equal check fails.
>>
>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
>> ---
>> Macros come in part 2. Once we are happy with the macros
>> this two patches should be squashed into one. 
>> ---
>>  include/migration/vmstate.h |  1 +
>>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
>>  2 files changed, 32 insertions(+), 5 deletions(-)
>>
>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>> index 66895623da..d90d9b12ca 100644
>> --- a/include/migration/vmstate.h
>> +++ b/include/migration/vmstate.h
>> @@ -200,6 +200,7 @@ typedef enum {
>>  
>>  struct VMStateField {
>>      const char *name;
>> +    const char *err_hint;
>>      size_t offset;
>>      size_t size;
>>      size_t start;
>> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
>> index 7287c6baa6..84d0545a38 100644
>> --- a/migration/vmstate-types.c
>> +++ b/migration/vmstate-types.c
>> @@ -19,6 +19,7 @@
>>  #include "qemu/error-report.h"
>>  #include "qemu/queue.h"
>>  #include "trace.h"
>> +#include "qapi/error.h"
>>  
>>  /* bool */
>>  
>> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
>>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>                             VMStateField *field)
>>  {
>> +    Error *err = NULL;
>>      int32_t *v = pv;
>>      int32_t v2;
>>      qemu_get_sbe32s(f, &v2);
>> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>      if (*v == v2) {
>>          return 0;
>>      }
>> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
>> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
>> +    if (field->err_hint) {
>> +        error_append_hint(&err, "%s\n", field->err_hint);
>> +    }
>> +    error_report_err(err);
> 
> I'm a bit worried as to whether the error_append_hint data gets
> printed out by error_report_err if we're being driven by a QMP
> monitor.
> error_report_err uses error_printf_unless_qmp
> 
> Since this code doesn't really handle Error *'s back up,
> and always prints it's errors into stderr, I'd prefer if you just
> used error_report again for the hint, something like:
> 
> if (field->err_hint) {
>   error_report("%" PRIx32 " != %" PRIx32 "(%s)",
>                *v, v2, field->err_hint);
> } else {
>   error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> }
> 
> Dave

One reason I choose error_report_err is to be consistent about hint
reporting (the other one is that was what Connie suggested). I do
not understand why do we omit hints if QMP, but I figured that's
our policy. So the hint I'm adding must not be printed in QMP
context -- because that's our policy. I was pretty sure what I
want to do is add a hint (and not make a very long 'core' error
message).

Can you (or somebody else)  explain why are hints dropped in QMP
context?

Don't misunderstand I'm open towards your proposal, it's just
that:
1) I would like to understand.
2) I would like to get the very same result as produced by
https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 

Regards,
Halil
Halil Pasic June 14, 2017, 1:51 p.m. UTC | #3
On 06/08/2017 01:05 PM, Halil Pasic wrote:
> 
> 
> On 06/07/2017 11:51 AM, Dr. David Alan Gilbert wrote:
>> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
>>> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
>>> (it's actually the best we can do). Especially in these cases a verbose
>>> error message is required.
>>>
>>> Let's introduce infrastructure for specifying a error hint to be used if
>>> equal check fails.
>>>
>>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
>>> ---
>>> Macros come in part 2. Once we are happy with the macros
>>> this two patches should be squashed into one. 
>>> ---
>>>  include/migration/vmstate.h |  1 +
>>>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
>>>  2 files changed, 32 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>>> index 66895623da..d90d9b12ca 100644
>>> --- a/include/migration/vmstate.h
>>> +++ b/include/migration/vmstate.h
>>> @@ -200,6 +200,7 @@ typedef enum {
>>>  
>>>  struct VMStateField {
>>>      const char *name;
>>> +    const char *err_hint;
>>>      size_t offset;
>>>      size_t size;
>>>      size_t start;
>>> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
>>> index 7287c6baa6..84d0545a38 100644
>>> --- a/migration/vmstate-types.c
>>> +++ b/migration/vmstate-types.c
>>> @@ -19,6 +19,7 @@
>>>  #include "qemu/error-report.h"
>>>  #include "qemu/queue.h"
>>>  #include "trace.h"
>>> +#include "qapi/error.h"
>>>  
>>>  /* bool */
>>>  
>>> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
>>>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>>                             VMStateField *field)
>>>  {
>>> +    Error *err = NULL;
>>>      int32_t *v = pv;
>>>      int32_t v2;
>>>      qemu_get_sbe32s(f, &v2);
>>> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>>      if (*v == v2) {
>>>          return 0;
>>>      }
>>> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
>>> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
>>> +    if (field->err_hint) {
>>> +        error_append_hint(&err, "%s\n", field->err_hint);
>>> +    }
>>> +    error_report_err(err);
>>
>> I'm a bit worried as to whether the error_append_hint data gets
>> printed out by error_report_err if we're being driven by a QMP
>> monitor.
>> error_report_err uses error_printf_unless_qmp
>>
>> Since this code doesn't really handle Error *'s back up,
>> and always prints it's errors into stderr, I'd prefer if you just
>> used error_report again for the hint, something like:
>>
>> if (field->err_hint) {
>>   error_report("%" PRIx32 " != %" PRIx32 "(%s)",
>>                *v, v2, field->err_hint);
>> } else {
>>   error_report("%" PRIx32 " != %" PRIx32, *v, v2);
>> }
>>
>> Dave
> 
> One reason I choose error_report_err is to be consistent about hint
> reporting (the other one is that was what Connie suggested). I do
> not understand why do we omit hints if QMP, but I figured that's
> our policy. So the hint I'm adding must not be printed in QMP
> context -- because that's our policy. I was pretty sure what I
> want to do is add a hint (and not make a very long 'core' error
> message).
> 
> Can you (or somebody else)  explain why are hints dropped in QMP
> context?
> 
> Don't misunderstand I'm open towards your proposal, it's just
> that:
> 1) I would like to understand.
> 2) I would like to get the very same result as produced by
> https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
> 
> Regards,
> Halil
> 
> 

ping.

I would like to do a v2, but I want this sorted out first.

'This' basically boils down to the question and
'Why aren't hints reported in QMP context?' and 'Why is this
case special (a hint should be reported
even in QMP context?'

Regarding the first question hints being reported via
error_printf_unless_qmp seems to come from commit
50b7b000c9 ("hmp: Allow for error message hints on HMP")
--> Cc-ing Eric maybe he can help.

Regards,
Halil
Dr. David Alan Gilbert June 22, 2017, 8:22 a.m. UTC | #4
* Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> 
> 
> On 06/08/2017 01:05 PM, Halil Pasic wrote:
> > 
> > 
> > On 06/07/2017 11:51 AM, Dr. David Alan Gilbert wrote:
> >> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> >>> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
> >>> (it's actually the best we can do). Especially in these cases a verbose
> >>> error message is required.
> >>>
> >>> Let's introduce infrastructure for specifying a error hint to be used if
> >>> equal check fails.
> >>>
> >>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
> >>> ---
> >>> Macros come in part 2. Once we are happy with the macros
> >>> this two patches should be squashed into one. 
> >>> ---
> >>>  include/migration/vmstate.h |  1 +
> >>>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
> >>>  2 files changed, 32 insertions(+), 5 deletions(-)
> >>>
> >>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> >>> index 66895623da..d90d9b12ca 100644
> >>> --- a/include/migration/vmstate.h
> >>> +++ b/include/migration/vmstate.h
> >>> @@ -200,6 +200,7 @@ typedef enum {
> >>>  
> >>>  struct VMStateField {
> >>>      const char *name;
> >>> +    const char *err_hint;
> >>>      size_t offset;
> >>>      size_t size;
> >>>      size_t start;
> >>> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> >>> index 7287c6baa6..84d0545a38 100644
> >>> --- a/migration/vmstate-types.c
> >>> +++ b/migration/vmstate-types.c
> >>> @@ -19,6 +19,7 @@
> >>>  #include "qemu/error-report.h"
> >>>  #include "qemu/queue.h"
> >>>  #include "trace.h"
> >>> +#include "qapi/error.h"
> >>>  
> >>>  /* bool */
> >>>  
> >>> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
> >>>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
> >>>                             VMStateField *field)
> >>>  {
> >>> +    Error *err = NULL;
> >>>      int32_t *v = pv;
> >>>      int32_t v2;
> >>>      qemu_get_sbe32s(f, &v2);
> >>> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
> >>>      if (*v == v2) {
> >>>          return 0;
> >>>      }
> >>> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> >>> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
> >>> +    if (field->err_hint) {
> >>> +        error_append_hint(&err, "%s\n", field->err_hint);
> >>> +    }
> >>> +    error_report_err(err);
> >>
> >> I'm a bit worried as to whether the error_append_hint data gets
> >> printed out by error_report_err if we're being driven by a QMP
> >> monitor.
> >> error_report_err uses error_printf_unless_qmp
> >>
> >> Since this code doesn't really handle Error *'s back up,
> >> and always prints it's errors into stderr, I'd prefer if you just
> >> used error_report again for the hint, something like:
> >>
> >> if (field->err_hint) {
> >>   error_report("%" PRIx32 " != %" PRIx32 "(%s)",
> >>                *v, v2, field->err_hint);
> >> } else {
> >>   error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> >> }
> >>
> >> Dave
> > 
> > One reason I choose error_report_err is to be consistent about hint
> > reporting (the other one is that was what Connie suggested). I do
> > not understand why do we omit hints if QMP, but I figured that's
> > our policy. So the hint I'm adding must not be printed in QMP
> > context -- because that's our policy. I was pretty sure what I
> > want to do is add a hint (and not make a very long 'core' error
> > message).
> > 
> > Can you (or somebody else)  explain why are hints dropped in QMP
> > context?
> > 
> > Don't misunderstand I'm open towards your proposal, it's just
> > that:
> > 1) I would like to understand.
> > 2) I would like to get the very same result as produced by
> > https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
> > 
> > Regards,
> > Halil
> > 
> > 
> 
> ping.
> 
> I would like to do a v2, but I want this sorted out first.
> 
> 'This' basically boils down to the question and
> 'Why aren't hints reported in QMP context?' and 'Why is this
> case special (a hint should be reported
> even in QMP context?'
> 
> Regarding the first question hints being reported via
> error_printf_unless_qmp seems to come from commit
> 50b7b000c9 ("hmp: Allow for error message hints on HMP")
> --> Cc-ing Eric maybe he can help.

I don't understand the full logic behind error_append_hint;
my only concern here is that the full text ends up on stderr
even if the migration is driven by QMP.
Since we can do that just by using error_report like it's already
being used with the slight change I suggested, it seems easy.

Dave

> Regards,
> Halil
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Halil Pasic June 22, 2017, 1:18 p.m. UTC | #5
On 06/22/2017 10:22 AM, Dr. David Alan Gilbert wrote:
> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
>>
>>
>> On 06/08/2017 01:05 PM, Halil Pasic wrote:
>>>
>>>
>>> On 06/07/2017 11:51 AM, Dr. David Alan Gilbert wrote:
>>>> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
>>>>> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
>>>>> (it's actually the best we can do). Especially in these cases a verbose
>>>>> error message is required.
>>>>>
>>>>> Let's introduce infrastructure for specifying a error hint to be used if
>>>>> equal check fails.
>>>>>
>>>>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
>>>>> ---
>>>>> Macros come in part 2. Once we are happy with the macros
>>>>> this two patches should be squashed into one. 
>>>>> ---
>>>>>  include/migration/vmstate.h |  1 +
>>>>>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
>>>>>  2 files changed, 32 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>>>>> index 66895623da..d90d9b12ca 100644
>>>>> --- a/include/migration/vmstate.h
>>>>> +++ b/include/migration/vmstate.h
>>>>> @@ -200,6 +200,7 @@ typedef enum {
>>>>>  
>>>>>  struct VMStateField {
>>>>>      const char *name;
>>>>> +    const char *err_hint;
>>>>>      size_t offset;
>>>>>      size_t size;
>>>>>      size_t start;
>>>>> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
>>>>> index 7287c6baa6..84d0545a38 100644
>>>>> --- a/migration/vmstate-types.c
>>>>> +++ b/migration/vmstate-types.c
>>>>> @@ -19,6 +19,7 @@
>>>>>  #include "qemu/error-report.h"
>>>>>  #include "qemu/queue.h"
>>>>>  #include "trace.h"
>>>>> +#include "qapi/error.h"
>>>>>  
>>>>>  /* bool */
>>>>>  
>>>>> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
>>>>>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>>>>                             VMStateField *field)
>>>>>  {
>>>>> +    Error *err = NULL;
>>>>>      int32_t *v = pv;
>>>>>      int32_t v2;
>>>>>      qemu_get_sbe32s(f, &v2);
>>>>> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
>>>>>      if (*v == v2) {
>>>>>          return 0;
>>>>>      }
>>>>> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
>>>>> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
>>>>> +    if (field->err_hint) {
>>>>> +        error_append_hint(&err, "%s\n", field->err_hint);
>>>>> +    }
>>>>> +    error_report_err(err);
>>>>
>>>> I'm a bit worried as to whether the error_append_hint data gets
>>>> printed out by error_report_err if we're being driven by a QMP
>>>> monitor.
>>>> error_report_err uses error_printf_unless_qmp
>>>>
>>>> Since this code doesn't really handle Error *'s back up,
>>>> and always prints it's errors into stderr, I'd prefer if you just
>>>> used error_report again for the hint, something like:
>>>>
>>>> if (field->err_hint) {
>>>>   error_report("%" PRIx32 " != %" PRIx32 "(%s)",
>>>>                *v, v2, field->err_hint);
>>>> } else {
>>>>   error_report("%" PRIx32 " != %" PRIx32, *v, v2);
>>>> }
>>>>
>>>> Dave
>>>
>>> One reason I choose error_report_err is to be consistent about hint
>>> reporting (the other one is that was what Connie suggested). I do
>>> not understand why do we omit hints if QMP, but I figured that's
>>> our policy. So the hint I'm adding must not be printed in QMP
>>> context -- because that's our policy. I was pretty sure what I
>>> want to do is add a hint (and not make a very long 'core' error
>>> message).
>>>
>>> Can you (or somebody else)  explain why are hints dropped in QMP
>>> context?
>>>
>>> Don't misunderstand I'm open towards your proposal, it's just
>>> that:
>>> 1) I would like to understand.
>>> 2) I would like to get the very same result as produced by
>>> https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
>>>
>>> Regards,
>>> Halil
>>>
>>>
>>
>> ping.
>>
>> I would like to do a v2, but I want this sorted out first.
>>
>> 'This' basically boils down to the question and
>> 'Why aren't hints reported in QMP context?' and 'Why is this
>> case special (a hint should be reported
>> even in QMP context?'
>>
>> Regarding the first question hints being reported via
>> error_printf_unless_qmp seems to come from commit
>> 50b7b000c9 ("hmp: Allow for error message hints on HMP")
>> --> Cc-ing Eric maybe he can help.
> 
> I don't understand the full logic behind error_append_hint;
> my only concern here is that the full text ends up on stderr
> even if the migration is driven by QMP.
> Since we can do that just by using error_report like it's already
> being used with the slight change I suggested, it seems easy.
> 
> Dave
> 

Thanks for the reply! Since nobody else cared to explain the logic,
I guess it is not all that important and we are fine with printing
the hint in QMP context too.

I would like to keep the output consistent with 8ed179c937 ("s390x/css:
catch section mismatch on load", 2017-05-18).

First I tried with (too make the err_hint look like a hint)

+    if (field->err_hint) {
+        error_report("%" PRIx32 " != %" PRIx32 "\n%s\n",
+                     *v, v2, field->err_hint);
+    } else {
+        error_report("%" PRIx32 " != %" PRIx32, *v, v2);
+    } 

but checkpatch does not like that because newline in error
message seems to be evil.

Would you be also OK with:

    error_report("%" PRIx32 " != %" PRIx32, *v, v2);                            
    if (field->err_hint) {                                                      
        error_printf("%s\n", field->err_hint);                                  
    } 
or are you preferring producing a single line (in that case I
would have to sacrifice 'no change in behavior' for my vmstate
conversion of virtio-ccw :( )?

Regards,
Halil


>> Regards,
>> Halil
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
Dr. David Alan Gilbert June 22, 2017, 5:06 p.m. UTC | #6
* Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> 
> 
> On 06/22/2017 10:22 AM, Dr. David Alan Gilbert wrote:
> > * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> >>
> >>
> >> On 06/08/2017 01:05 PM, Halil Pasic wrote:
> >>>
> >>>
> >>> On 06/07/2017 11:51 AM, Dr. David Alan Gilbert wrote:
> >>>> * Halil Pasic (pasic@linux.vnet.ibm.com) wrote:
> >>>>> In some cases a failing VMSTATE_*_EQUAL does not mean we detected a bug
> >>>>> (it's actually the best we can do). Especially in these cases a verbose
> >>>>> error message is required.
> >>>>>
> >>>>> Let's introduce infrastructure for specifying a error hint to be used if
> >>>>> equal check fails.
> >>>>>
> >>>>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
> >>>>> ---
> >>>>> Macros come in part 2. Once we are happy with the macros
> >>>>> this two patches should be squashed into one. 
> >>>>> ---
> >>>>>  include/migration/vmstate.h |  1 +
> >>>>>  migration/vmstate-types.c   | 36 +++++++++++++++++++++++++++++++-----
> >>>>>  2 files changed, 32 insertions(+), 5 deletions(-)
> >>>>>
> >>>>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> >>>>> index 66895623da..d90d9b12ca 100644
> >>>>> --- a/include/migration/vmstate.h
> >>>>> +++ b/include/migration/vmstate.h
> >>>>> @@ -200,6 +200,7 @@ typedef enum {
> >>>>>  
> >>>>>  struct VMStateField {
> >>>>>      const char *name;
> >>>>> +    const char *err_hint;
> >>>>>      size_t offset;
> >>>>>      size_t size;
> >>>>>      size_t start;
> >>>>> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> >>>>> index 7287c6baa6..84d0545a38 100644
> >>>>> --- a/migration/vmstate-types.c
> >>>>> +++ b/migration/vmstate-types.c
> >>>>> @@ -19,6 +19,7 @@
> >>>>>  #include "qemu/error-report.h"
> >>>>>  #include "qemu/queue.h"
> >>>>>  #include "trace.h"
> >>>>> +#include "qapi/error.h"
> >>>>>  
> >>>>>  /* bool */
> >>>>>  
> >>>>> @@ -118,6 +119,7 @@ const VMStateInfo vmstate_info_int32 = {
> >>>>>  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
> >>>>>                             VMStateField *field)
> >>>>>  {
> >>>>> +    Error *err = NULL;
> >>>>>      int32_t *v = pv;
> >>>>>      int32_t v2;
> >>>>>      qemu_get_sbe32s(f, &v2);
> >>>>> @@ -125,7 +127,11 @@ static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
> >>>>>      if (*v == v2) {
> >>>>>          return 0;
> >>>>>      }
> >>>>> -    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> >>>>> +    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
> >>>>> +    if (field->err_hint) {
> >>>>> +        error_append_hint(&err, "%s\n", field->err_hint);
> >>>>> +    }
> >>>>> +    error_report_err(err);
> >>>>
> >>>> I'm a bit worried as to whether the error_append_hint data gets
> >>>> printed out by error_report_err if we're being driven by a QMP
> >>>> monitor.
> >>>> error_report_err uses error_printf_unless_qmp
> >>>>
> >>>> Since this code doesn't really handle Error *'s back up,
> >>>> and always prints it's errors into stderr, I'd prefer if you just
> >>>> used error_report again for the hint, something like:
> >>>>
> >>>> if (field->err_hint) {
> >>>>   error_report("%" PRIx32 " != %" PRIx32 "(%s)",
> >>>>                *v, v2, field->err_hint);
> >>>> } else {
> >>>>   error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> >>>> }
> >>>>
> >>>> Dave
> >>>
> >>> One reason I choose error_report_err is to be consistent about hint
> >>> reporting (the other one is that was what Connie suggested). I do
> >>> not understand why do we omit hints if QMP, but I figured that's
> >>> our policy. So the hint I'm adding must not be printed in QMP
> >>> context -- because that's our policy. I was pretty sure what I
> >>> want to do is add a hint (and not make a very long 'core' error
> >>> message).
> >>>
> >>> Can you (or somebody else)  explain why are hints dropped in QMP
> >>> context?
> >>>
> >>> Don't misunderstand I'm open towards your proposal, it's just
> >>> that:
> >>> 1) I would like to understand.
> >>> 2) I would like to get the very same result as produced by
> >>> https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
> >>>
> >>> Regards,
> >>> Halil
> >>>
> >>>
> >>
> >> ping.
> >>
> >> I would like to do a v2, but I want this sorted out first.
> >>
> >> 'This' basically boils down to the question and
> >> 'Why aren't hints reported in QMP context?' and 'Why is this
> >> case special (a hint should be reported
> >> even in QMP context?'
> >>
> >> Regarding the first question hints being reported via
> >> error_printf_unless_qmp seems to come from commit
> >> 50b7b000c9 ("hmp: Allow for error message hints on HMP")
> >> --> Cc-ing Eric maybe he can help.
> > 
> > I don't understand the full logic behind error_append_hint;
> > my only concern here is that the full text ends up on stderr
> > even if the migration is driven by QMP.
> > Since we can do that just by using error_report like it's already
> > being used with the slight change I suggested, it seems easy.
> > 
> > Dave
> > 
> 
> Thanks for the reply! Since nobody else cared to explain the logic,
> I guess it is not all that important and we are fine with printing
> the hint in QMP context too.
> 
> I would like to keep the output consistent with 8ed179c937 ("s390x/css:
> catch section mismatch on load", 2017-05-18).
> 
> First I tried with (too make the err_hint look like a hint)
> 
> +    if (field->err_hint) {
> +        error_report("%" PRIx32 " != %" PRIx32 "\n%s\n",
> +                     *v, v2, field->err_hint);
> +    } else {
> +        error_report("%" PRIx32 " != %" PRIx32, *v, v2);
> +    } 
> 
> but checkpatch does not like that because newline in error
> message seems to be evil.
> 
> Would you be also OK with:
> 
>     error_report("%" PRIx32 " != %" PRIx32, *v, v2);                            
>     if (field->err_hint) {                                                      
>         error_printf("%s\n", field->err_hint);                                  
>     } 

Yes I'm OK with that - what's important to me is getting the output
into the stderr log so I've got something to work with when it fails.

> Regards,
> or are you preferring producing a single line (in that case I
> would have to sacrifice 'no change in behavior' for my vmstate
> conversion of virtio-ccw :( )?

Single line is less important to me.

Dave

> Halil
> 
> 
> >> Regards,
> >> Halil
> >>
> > --
> > Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> > 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Eric Blake June 29, 2017, 7:04 p.m. UTC | #7
On 06/14/2017 08:51 AM, Halil Pasic wrote:

[apologies for the delayed response, and also adding Markus]


>>
>> One reason I choose error_report_err is to be consistent about hint
>> reporting (the other one is that was what Connie suggested). I do
>> not understand why do we omit hints if QMP, but I figured that's
>> our policy. So the hint I'm adding must not be printed in QMP
>> context -- because that's our policy. I was pretty sure what I
>> want to do is add a hint (and not make a very long 'core' error
>> message).
>>
>> Can you (or somebody else)  explain why are hints dropped in QMP
>> context?
>>
>> Don't misunderstand I'm open towards your proposal, it's just
>> that:
>> 1) I would like to understand.
>> 2) I would like to get the very same result as produced by
>> https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
>>
>> Regards,
>> Halil
>>
>>
> 
> ping.
> 
> I would like to do a v2, but I want this sorted out first.
> 
> 'This' basically boils down to the question and
> 'Why aren't hints reported in QMP context?'

QMP is supposed to be machine-parseable.  Hints are supposed to be
human-readable. If you have a machine managing the monitor, the hint
adds nothing but bandwidth consumption, because machine should not be
parsing the human portion of the error message in the first place (as it
is, libvirt already just logs the human-readable portion of a message,
and bases its actions solely on the machine-stable portions of an error
reply: namely, whether an error was sent at all, and occasionally, what
error class was used for that error - there's no guarantee a human will
be reading the log, though).

There's also the question of whether the hints are even useful (telling
the user to do something differently doesn't help if it wasn't the user,
but libvirt, that was doing things wrong to cause the error in the first
place).

So while those points may or may not be the original rationale for why
hints are not used in QMP, but it is an explanation that works for me
now.  Markus may also have an opinion on the matter.

> and 'Why is this
> case special (a hint should be reported
> even in QMP context?'

If something absolutely must be reported, then it is not a hint, and
shouldn't be using the hint mechanism.

> 
> Regarding the first question hints being reported via
> error_printf_unless_qmp seems to come from commit
> 50b7b000c9 ("hmp: Allow for error message hints on HMP")
> --> Cc-ing Eric maybe he can help.
> 
> Regards,
> Halil
> 
>
Halil Pasic June 30, 2017, 2:41 p.m. UTC | #8
On 06/29/2017 09:04 PM, Eric Blake wrote:
> On 06/14/2017 08:51 AM, Halil Pasic wrote:
> 
> [apologies for the delayed response, and also adding Markus]
> 

No problem. Many thanks for the effort. I see I've ended up with a
lengthy email. A disclaimer before I start: No strong opinions here.
Things have been working reasonably well for years and I respect that.
Nevertheless I like conceptual clarity, and because of this, I ended up
doing discussion without considering the expected cost/benefit ration. If
I think about it that way it probably ain't wort it. So I'm OK with
concluding the discussion with that argument at any time -- just tell ;).


>>>
>>> One reason I choose error_report_err is to be consistent about hint
>>> reporting (the other one is that was what Connie suggested). I do
>>> not understand why do we omit hints if QMP, but I figured that's
>>> our policy. So the hint I'm adding must not be printed in QMP
>>> context -- because that's our policy. I was pretty sure what I
>>> want to do is add a hint (and not make a very long 'core' error
>>> message).
>>>
>>> Can you (or somebody else)  explain why are hints dropped in QMP
>>> context?
>>>
>>> Don't misunderstand I'm open towards your proposal, it's just
>>> that:
>>> 1) I would like to understand.
>>> 2) I would like to get the very same result as produced by
>>> https://lists.nongnu.org/archive/html/qemu-devel/2017-06/msg01472.html 
>>>
>>> Regards,
>>> Halil
>>>
>>>
>>
>> ping.
>>
>> I would like to do a v2, but I want this sorted out first.
>>
>> 'This' basically boils down to the question and
>> 'Why aren't hints reported in QMP context?'
> 
> QMP is supposed to be machine-parseable.  Hints are supposed to be
> human-readable. If you have a machine managing the monitor, the hint
> adds nothing but bandwidth consumption, because machine should not be
> parsing the human portion of the error message in the first place (as it
> is, libvirt already just logs the human-readable portion of a message,
> and bases its actions solely on the machine-stable portions of an error
> reply: namely, whether an error was sent at all, and occasionally, what
> error class was used for that error - there's no guarantee a human will
> be reading the log, though).


Seems I've made wrong assumptions about error messages (in QEMU) up until
now. If I understand you correctly, in QEMU error messages are part of
the API (but hints are not). Thus if one changes a typo in an error
message (like here
https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06732.html) the
one is strictly speaking breaking API backward compatibility.  Is that
really the way we want to have things?

From prior experiences I'm more used to think about error messages as
something meant for human consumption, and expressing things expected to
be relevant for some kind of client code in a different way (optimized
for machine consumption).

If however the error message ain't part of the machine relevant portion,
then the same argument applies as to the 'hint', and I don't see the
reason for handling hints differently. Do you agree with my
argumentation?

Let us also examine some comments in qapi/error.h:

/*
 * Just like error_setg(), except you get to specify the error class.
 * Note: use of error classes other than ERROR_CLASS_GENERIC_ERROR is
 * strongly discouraged.
 */
#define error_set(errp, err_class, fmt, ...)

This probably means client code (e.g. libvirt) is in general not meant
to make decision based on the type of the error that occurred (e.g. what
went wrong).

/*
[..]
 * human-readable error message is made from printf-style @fmt, ...
 * The resulting message should be a single phrase, with no newline or
 * trailing punctuation.
[..]
 */
#define error_setg(errp, fmt, ...) 

From this it seems to me error message is intended for human-consumption.

/*
 * Append a printf-style human-readable explanation to an existing error.
 * @errp may be NULL, but not &error_fatal or &error_abort.
 * Trivially the case if you call it only after error_setg() or
 * error_propagate().
 * May be called multiple times.  The resulting hint should end with a
 * newline.
 */
void error_append_hint(Error **errp, const char *fmt, ...)

From this, I would say: The 'hint' is about why something went wrong. The
'message' is about what problem in particular or in general was
encountered (in general, the requested operation failed/can not be
performed; the caller knows what operation was attempted) and should be
considered debugging aid (along with the bits supposed to answer the
question 'where'). This debugging aid, however, can be very useful to the
end user if seeking a workaround, and the error_class is for providing
client code with additional information beyond 'something went wrong'.

Whether the message is supposed to be only about 'in particular' is a
tricky one, and should probably depend on the contract: if the client
code is supposed tell us which high level operation failed then I guess
just 'in particular' is good, if however the client code is expected to
just log errors and proceed without providing any extra context then I
guess the message is both about 'in general' in particular. I think
error_prepend is used to provide the 'extra context' and shows in the
direction of the later, but I'm not sure (e.g. whether is it OK ton not
include any information about what where we trying to accomplish in the
message when an error is created).


> 
> There's also the question of whether the hints are even useful (telling
> the user to do something differently doesn't help if it wasn't the user,
> but libvirt, that was doing things wrong to cause the error in the first
> place).
> 

To me this translates to the following question. Is it reasonable to
assume that we are interested in what went wrong (error message).


> So while those points may or may not be the original rationale for why
> hints are not used in QMP, but it is an explanation that works for me
> now.  Markus may also have an opinion on the matter.
> 
>> and 'Why is this
>> case special (a hint should be reported
>> even in QMP context?'
> 
> If something absolutely must be reported, then it is not a hint, and
> shouldn't be using the hint mechanism.
> 

I find it hard to formulate criteria for 'must be reported'. I'm afraid
this is backwards logic: since the hint may not be reported everything
that needs to be reported is not a hint. This is a valid approach of
course, but then I think some modifications to the comments in error.h
would not hurt. And maybe something with verbose would be more
expressive name.

I hope all this makes some sense and ain't pure waste of time...

Regards,
Halil
Eric Blake June 30, 2017, 2:54 p.m. UTC | #9
On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>> 'This' basically boils down to the question and
>>> 'Why aren't hints reported in QMP context?'
>>
>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>> human-readable. If you have a machine managing the monitor, the hint
>> adds nothing but bandwidth consumption, because machine should not be
>> parsing the human portion of the error message in the first place (as it
>> is, libvirt already just logs the human-readable portion of a message,
>> and bases its actions solely on the machine-stable portions of an error
>> reply: namely, whether an error was sent at all, and occasionally, what
>> error class was used for that error - there's no guarantee a human will
>> be reading the log, though).
> 
> 
> Seems I've made wrong assumptions about error messages (in QEMU) up until
> now. If I understand you correctly, in QEMU error messages are part of
> the API (but hints are not). Thus if one changes a typo in an error
> message (like here
> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06732.html) the
> one is strictly speaking breaking API backward compatibility.  Is that
> really the way we want to have things?

Quite the opposite. In QMP, the EXISTENCE of an error message is part of
the API, but the CONTENTS of the message are not (machines are not
supposed to further parse the message) - anything that the machine would
want to differentiate between two different possible error messages
should instead be conveyed via a second field in the same returned
dictionary (the error class), and not by parsing the message.  Most
often, there is not a strong case for having differentiation, so most
errors are lumped in the generic class (error_setg() makes this easy to
do by default).  An example where differentiation matters: look at the
"Important Note" in blockdev.c:qmp_block_commit().

> 
> From prior experiences I'm more used to think about error messages as
> something meant for human consumption, and expressing things expected to
> be relevant for some kind of client code in a different way (optimized
> for machine consumption).
> 
> If however the error message ain't part of the machine relevant portion,
> then the same argument applies as to the 'hint', and I don't see the
> reason for handling hints differently. Do you agree with my
> argumentation?

Indeed, it may not hurt to start passing the hints over the wire (errors
would then consume more bandwidth, but errors are not the hot path).
And I'm not necessarily opposed to that change, so much as trying to
document why it is not currently the case.  At the same time, I probably
won't be the one writing a path to populate the hint information into
the QMP error, as I don't have any reason to use the hint when
controlling libvirt (except maybe for logging, but there, the hint is
not going to help the end user, because it's not the end-user's fault
that libvirt used the API wrong to get a hint in the first place).


>> If something absolutely must be reported, then it is not a hint, and
>> shouldn't be using the hint mechanism.
>>
> 
> I find it hard to formulate criteria for 'must be reported'. I'm afraid
> this is backwards logic: since the hint may not be reported everything
> that needs to be reported is not a hint. This is a valid approach of
> course, but then I think some modifications to the comments in error.h
> would not hurt. And maybe something with verbose would be more
> expressive name.
> 
> I hope all this makes some sense and ain't pure waste of time...

No, it never hurts to question whether the design is optimal, and it's
better to question first to know whether it is even worth patching
things to behave differently, rather than spending time patching it only
to have a maintainer clarify that the patch can't be accepted because of
some design constraint.  So I still hope Markus will chime in.
Halil Pasic June 30, 2017, 4:10 p.m. UTC | #10
On 06/30/2017 04:54 PM, Eric Blake wrote:
> On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>>> 'This' basically boils down to the question and
>>>> 'Why aren't hints reported in QMP context?'
>>>
>>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>>> human-readable. If you have a machine managing the monitor, the hint
>>> adds nothing but bandwidth consumption, because machine should not be
>>> parsing the human portion of the error message in the first place (as it
>>> is, libvirt already just logs the human-readable portion of a message,
>>> and bases its actions solely on the machine-stable portions of an error
>>> reply: namely, whether an error was sent at all, and occasionally, what
>>> error class was used for that error - there's no guarantee a human will
>>> be reading the log, though).
>>
>>
>> Seems I've made wrong assumptions about error messages (in QEMU) up until
>> now. If I understand you correctly, in QEMU error messages are part of
>> the API (but hints are not). Thus if one changes a typo in an error
>> message (like here
>> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06732.html) the
>> one is strictly speaking breaking API backward compatibility.  Is that
>> really the way we want to have things?
> 
> Quite the opposite. In QMP, the EXISTENCE of an error message is part of
> the API, but the CONTENTS of the message are not (machines are not
> supposed to further parse the message) - anything that the machine would
> want to differentiate between two different possible error messages
> should instead be conveyed via a second field in the same returned
> dictionary (the error class), and not by parsing the message.  

I think we are in agreement, it's just that you call 'error message' what
I would call 'error response' (from docs/qmp-spec.txt). For me an error
response MAY OR MAY NOT or MUST (I don't know it is not stated in
qmp-spec.txt, and qapi-schema.json did not make me much smarter: I would
guess may or may not -- there is even some comment in qapi-schema showing
it that direction) contain a 'desc' which is per definition "- The
"desc" member is a human-readable error message. Clients should not
attempt to parse this message.".

So I would call that 'error message'. If the logic (modulo reporting) in
libvirt (I don't know, my focus isn't libvirt) or any other management
software depends on the EXISTENCE of 'desc' (or human-readable portion of
some error API object) I find that weird, but it's a definition thing.


> Most
> often, there is not a strong case for having differentiation, so most
> errors are lumped in the generic class (error_setg() makes this easy to
> do by default).  An example where differentiation matters: look at the
> "Important Note" in blockdev.c:qmp_block_commit().

I think I have seen that. I find the 'strong discouragement' weird, because
if there is a reason to have differentiation the error class is the way
to go. And if there is no reason to -- it should be obvious.

> 
>>
>> From prior experiences I'm more used to think about error messages as
>> something meant for human consumption, and expressing things expected to
>> be relevant for some kind of client code in a different way (optimized
>> for machine consumption).
>>
>> If however the error message ain't part of the machine relevant portion,
>> then the same argument applies as to the 'hint', and I don't see the
>> reason for handling hints differently. Do you agree with my
>> argumentation?
> 
> Indeed, it may not hurt to start passing the hints over the wire (errors
> would then consume more bandwidth, but errors are not the hot path).
> And I'm not necessarily opposed to that change, so much as trying to
> document why it is not currently the case.  At the same time, I probably
> won't be the one writing a path to populate the hint information into
> the QMP error, as I don't have any reason to use the hint when
> controlling libvirt (except maybe for logging, but there, the hint is
> not going to help the end user, because it's not the end-user's fault
> that libvirt used the API wrong to get a hint in the first place).

For me both human readable things make sense only for error reporting
(effectively logging). Error.msg should IMHO be different, than Error.hint.
The existence of an error should be indicated by the Error object.

> 
> 
>>> If something absolutely must be reported, then it is not a hint, and
>>> shouldn't be using the hint mechanism.
>>>
>>
>> I find it hard to formulate criteria for 'must be reported'. I'm afraid
>> this is backwards logic: since the hint may not be reported everything
>> that needs to be reported is not a hint. This is a valid approach of
>> course, but then I think some modifications to the comments in error.h
>> would not hurt. And maybe something with verbose would be more
>> expressive name.
>>
>> I hope all this makes some sense and ain't pure waste of time...
> 
> No, it never hurts to question whether the design is optimal, and it's
> better to question first to know whether it is even worth patching
> things to behave differently, rather than spending time patching it only
> to have a maintainer clarify that the patch can't be accepted because of
> some design constraint.  So I still hope Markus will chime in.
> 

For this patch I went with Dave's proposal so I have no acute interest
in changing this.

Conceptually, for me it really boils down to the question: Is it reasonable
to assume that we are interested in what went wrong (error message)?

If yes, we are good as is. If no, we should not drop hint in QMP context.

Thanks for your time. I think we provided Markus with enough input to
make his call :).

Halil
Markus Armbruster July 3, 2017, 1:52 p.m. UTC | #11
Halil Pasic <pasic@linux.vnet.ibm.com> writes:

> On 06/30/2017 04:54 PM, Eric Blake wrote:
>> On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>>>> 'This' basically boils down to the question and
>>>>> 'Why aren't hints reported in QMP context?'
>>>>
>>>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>>>> human-readable. If you have a machine managing the monitor, the hint
>>>> adds nothing but bandwidth consumption, because machine should not be
>>>> parsing the human portion of the error message in the first place (as it
>>>> is, libvirt already just logs the human-readable portion of a message,
>>>> and bases its actions solely on the machine-stable portions of an error
>>>> reply: namely, whether an error was sent at all, and occasionally, what
>>>> error class was used for that error - there's no guarantee a human will
>>>> be reading the log, though).
>>>
>>>
>>> Seems I've made wrong assumptions about error messages (in QEMU) up until
>>> now. If I understand you correctly, in QEMU error messages are part of
>>> the API (but hints are not). Thus if one changes a typo in an error
>>> message (like here
>>> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06732.html) the
>>> one is strictly speaking breaking API backward compatibility.  Is that
>>> really the way we want to have things?
>> 
>> Quite the opposite. In QMP, the EXISTENCE of an error message is part of
>> the API, but the CONTENTS of the message are not (machines are not
>> supposed to further parse the message) - anything that the machine would
>> want to differentiate between two different possible error messages
>> should instead be conveyed via a second field in the same returned
>> dictionary (the error class), and not by parsing the message.  
>
> I think we are in agreement, it's just that you call 'error message' what
> I would call 'error response' (from docs/qmp-spec.txt).

According to qmp-spec.txt, the 'error response' is a JSON object of the
form

    { "error": { "class": json-string, "desc": json-string },
      "id": json-value }

>                                                         For me an error
> response MAY OR MAY NOT or MUST (I don't know it is not stated in
> qmp-spec.txt, and qapi-schema.json did not make me much smarter: I would
> guess may or may not -- there is even some comment in qapi-schema showing
> it that direction) contain a 'desc' which is per definition "- The
> "desc" member is a human-readable error message. Clients should not
> attempt to parse this message.".

Both in qmp-spec.txt and in the QAPI schema, members are mandatory
unless marked optional.  Thus, "desc" is mandatory.

> So I would call that 'error message'. If the logic (modulo reporting) in
> libvirt (I don't know, my focus isn't libvirt) or any other management
> software depends on the EXISTENCE of 'desc' (or human-readable portion of
> some error API object) I find that weird, but it's a definition thing.

QMP clients such as libvirt may depend on the existence of "desc", just
not on its contents.

Depending on existence: show it to a human user, log it ...

Depending on contents: if "desc" matches /pattern/, do this, else do
that.

>> Most
>> often, there is not a strong case for having differentiation, so most
>> errors are lumped in the generic class (error_setg() makes this easy to
>> do by default).  An example where differentiation matters: look at the
>> "Important Note" in blockdev.c:qmp_block_commit().
>
> I think I have seen that. I find the 'strong discouragement' weird, because
> if there is a reason to have differentiation the error class is the way
> to go. And if there is no reason to -- it should be obvious.

The "strong discouragement" is the result of a long and somewhat
tortuous history.  If you're interested, I can tell it once again.

>>> From prior experiences I'm more used to think about error messages as
>>> something meant for human consumption, and expressing things expected to
>>> be relevant for some kind of client code in a different way (optimized
>>> for machine consumption).
>>>
>>> If however the error message ain't part of the machine relevant portion,
>>> then the same argument applies as to the 'hint', and I don't see the
>>> reason for handling hints differently. Do you agree with my
>>> argumentation?
>> 
>> Indeed, it may not hurt to start passing the hints over the wire (errors
>> would then consume more bandwidth, but errors are not the hot path).
>> And I'm not necessarily opposed to that change, so much as trying to
>> document why it is not currently the case.  At the same time, I probably
>> won't be the one writing a path to populate the hint information into
>> the QMP error, as I don't have any reason to use the hint when
>> controlling libvirt (except maybe for logging, but there, the hint is
>> not going to help the end user, because it's not the end-user's fault
>> that libvirt used the API wrong to get a hint in the first place).
>
> For me both human readable things make sense only for error reporting
> (effectively logging). Error.msg should IMHO be different, than Error.hint.
> The existence of an error should be indicated by the Error object.

Consider this one from qemu-option.c:

        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
                   "a non-negative number below 2^64");
        error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
                          " kilo-, mega-, giga-, tera-, peta-\n"
                          "and exabytes, respectively.\n");

The hint is helpful for a human command line or HMP user.  It's actively
misleading in QMP.  Totally fine, it's how the "hint" feature is meant
to be used.

If we have errors that can't be adequately explained in a single error
message, we may need a way to add more explanation.  error_append_hint()
isn't.

>>>> If something absolutely must be reported, then it is not a hint, and
>>>> shouldn't be using the hint mechanism.

Exactly.

>>> I find it hard to formulate criteria for 'must be reported'. I'm afraid
>>> this is backwards logic: since the hint may not be reported everything
>>> that needs to be reported is not a hint. This is a valid approach of
>>> course, but then I think some modifications to the comments in error.h
>>> would not hurt. And maybe something with verbose would be more
>>> expressive name.
>>>
>>> I hope all this makes some sense and ain't pure waste of time...
>> 
>> No, it never hurts to question whether the design is optimal, and it's
>> better to question first to know whether it is even worth patching
>> things to behave differently, rather than spending time patching it only
>> to have a maintainer clarify that the patch can't be accepted because of
>> some design constraint.  So I still hope Markus will chime in.
>> 
>
> For this patch I went with Dave's proposal so I have no acute interest
> in changing this.
>
> Conceptually, for me it really boils down to the question: Is it reasonable
> to assume that we are interested in what went wrong (error message)?
>
> If yes, we are good as is. If no, we should not drop hint in QMP context.
>
> Thanks for your time. I think we provided Markus with enough input to
> make his call :).

I had a quick peek at the patch that triggered this discussion.  What
problem are you trying to solve?  According to your cover letter, it's
"to specify a hint for the case a vmstate equal assertion".  How is
nicer assertion failures related to QMP?  Am I confused?
Halil Pasic July 3, 2017, 4:21 p.m. UTC | #12
On 07/03/2017 03:52 PM, Markus Armbruster wrote:
> Halil Pasic <pasic@linux.vnet.ibm.com> writes:
> 
>> On 06/30/2017 04:54 PM, Eric Blake wrote:
>>> On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>>>>> 'This' basically boils down to the question and
>>>>>> 'Why aren't hints reported in QMP context?'
>>>>>
>>>>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>>>>> human-readable. If you have a machine managing the monitor, the hint
>>>>> adds nothing but bandwidth consumption, because machine should not be
>>>>> parsing the human portion of the error message in the first place (as it
>>>>> is, libvirt already just logs the human-readable portion of a message,
>>>>> and bases its actions solely on the machine-stable portions of an error
>>>>> reply: namely, whether an error was sent at all, and occasionally, what
>>>>> error class was used for that error - there's no guarantee a human will
>>>>> be reading the log, though).
>>>>
>>>>
>>>> Seems I've made wrong assumptions about error messages (in QEMU) up until
>>>> now. If I understand you correctly, in QEMU error messages are part of
>>>> the API (but hints are not). Thus if one changes a typo in an error
>>>> message (like here
>>>> https://lists.gnu.org/archive/html/qemu-devel/2017-06/msg06732.html) the
>>>> one is strictly speaking breaking API backward compatibility.  Is that
>>>> really the way we want to have things?
>>>
>>> Quite the opposite. In QMP, the EXISTENCE of an error message is part of
>>> the API, but the CONTENTS of the message are not (machines are not
>>> supposed to further parse the message) - anything that the machine would
>>> want to differentiate between two different possible error messages
>>> should instead be conveyed via a second field in the same returned
>>> dictionary (the error class), and not by parsing the message.  
>>
>> I think we are in agreement, it's just that you call 'error message' what
>> I would call 'error response' (from docs/qmp-spec.txt).
> 
> According to qmp-spec.txt, the 'error response' is a JSON object of the
> form
> 
>     { "error": { "class": json-string, "desc": json-string },
>       "id": json-value }
> 
>>                                                         For me an error
>> response MAY OR MAY NOT or MUST (I don't know it is not stated in
>> qmp-spec.txt, and qapi-schema.json did not make me much smarter: I would
>> guess may or may not -- there is even some comment in qapi-schema showing
>> it that direction) contain a 'desc' which is per definition "- The
>> "desc" member is a human-readable error message. Clients should not
>> attempt to parse this message.".
> 
> Both in qmp-spec.txt and in the QAPI schema, members are mandatory
> unless marked optional.  Thus, "desc" is mandatory.
> 

My bad! I've missed the 'mandatory unless marked optional part' in
qmp-spec.txt.

>> So I would call that 'error message'. If the logic (modulo reporting) in
>> libvirt (I don't know, my focus isn't libvirt) or any other management
>> software depends on the EXISTENCE of 'desc' (or human-readable portion of
>> some error API object) I find that weird, but it's a definition thing.
> 
> QMP clients such as libvirt may depend on the existence of "desc", just
> not on its contents.
> 
> Depending on existence: show it to a human user, log it ...
> 
> Depending on contents: if "desc" matches /pattern/, do this, else do
> that.
> 

I understand. My guess was that desc is optional because of this (quote):
"""
# If you're planning to adopt QMP, please observe the following:
#
#     1. The deprecation policy will take effect and be documented soon, please
#        check the documentation of each used command as soon as a new release of
#        QEMU is available
#
#     2. DO NOT rely on anything which is not explicit documented
#
#     3. Errors, in special, are not documented. Applications should NOT check
#        for specific errors classes or data (it's strongly recommended to only
#        check for the "error" key)
#
"""
(qapi-schema.json)

I think this is a solomonic solution ;), it's just that I've missed
a crucial bit.

>>> Most
>>> often, there is not a strong case for having differentiation, so most
>>> errors are lumped in the generic class (error_setg() makes this easy to
>>> do by default).  An example where differentiation matters: look at the
>>> "Important Note" in blockdev.c:qmp_block_commit().
>>
>> I think I have seen that. I find the 'strong discouragement' weird, because
>> if there is a reason to have differentiation the error class is the way
>> to go. And if there is no reason to -- it should be obvious.
> 
> The "strong discouragement" is the result of a long and somewhat
> tortuous history.  If you're interested, I can tell it once again.
> 

Thanks, but I value your time more than I'm interested.

>>>> From prior experiences I'm more used to think about error messages as
>>>> something meant for human consumption, and expressing things expected to
>>>> be relevant for some kind of client code in a different way (optimized
>>>> for machine consumption).
>>>>
>>>> If however the error message ain't part of the machine relevant portion,
>>>> then the same argument applies as to the 'hint', and I don't see the
>>>> reason for handling hints differently. Do you agree with my
>>>> argumentation?
>>>
>>> Indeed, it may not hurt to start passing the hints over the wire (errors
>>> would then consume more bandwidth, but errors are not the hot path).
>>> And I'm not necessarily opposed to that change, so much as trying to
>>> document why it is not currently the case.  At the same time, I probably
>>> won't be the one writing a path to populate the hint information into
>>> the QMP error, as I don't have any reason to use the hint when
>>> controlling libvirt (except maybe for logging, but there, the hint is
>>> not going to help the end user, because it's not the end-user's fault
>>> that libvirt used the API wrong to get a hint in the first place).
>>
>> For me both human readable things make sense only for error reporting
>> (effectively logging). Error.msg should IMHO be different, than Error.hint.
>> The existence of an error should be indicated by the Error object.
> 
> Consider this one from qemu-option.c:
> 
>         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
>                    "a non-negative number below 2^64");
>         error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
>                           " kilo-, mega-, giga-, tera-, peta-\n"
>                           "and exabytes, respectively.\n");
> 
> The hint is helpful for a human command line or HMP user.  It's actively
> misleading in QMP.

I agree.

> Totally fine, it's how the "hint" feature is meant
> to be used.
> 

Was not aware.

> If we have errors that can't be adequately explained in a single error
> message, we may need a way to add more explanation.  error_append_hint()
> isn't.
> 

Was not aware. Using hint in this very situation was suggested by Connie,
and I assumed she is long enough with the project to know...

In fact looking at  include/qapi/error.h:
"""
/*
 * Error reporting system loosely patterned after Glib's GError.
 *
 * Create an error:
 *     error_setg(&err, "situation normal, all fouled up");
 *
 * Create an error and add additional explanation:
 *     error_setg(&err, "invalid quark");
 *     error_append_hint(&err, "Valid quarks are up, down, strange, "
 *                       "charm, top, bottom.\n");
 *
 * Do *not* contract this to
 *     error_setg(&err, "invalid quark\n"
 *                "Valid quarks are up, down, strange, charm, top, bottom.");
"""

my understanding was and is still the exact opposite of what you say:
error_append_hint is for adding more explanation.

Furthermore 
"""
/*
 * Append a printf-style human-readable explanation to an existing error.
 * @errp may be NULL, but not &error_fatal or &error_abort.
 * Trivially the case if you call it only after error_setg() or
 * error_propagate().
 * May be called multiple times.  The resulting hint should end with a
 * newline.
 */
void error_append_hint(Error **errp, const char *fmt, ...)
"""

Assuming that error_append_hint() isn't for adding more explanation,
IMHO the doc does not adequately explain what it is for.

I have also failed to find any hint in qapi/error.h which is AFAIU
documenting the error api about this human-readable explanation
appended to an existing error by error_append_hint() is to be discarded
if the error is reported in QMP context.

Am I reading the api doc incorrectly, or did the documentation and
de-facto api diverge (behavior)?

>>>>> If something absolutely must be reported, then it is not a hint, and
>>>>> shouldn't be using the hint mechanism.
> 
> Exactly.
> 

Perfectly fine with me provided the apidoc tells me clearly what the hint is
for, and what it is not for.

>>>> I find it hard to formulate criteria for 'must be reported'. I'm afraid
>>>> this is backwards logic: since the hint may not be reported everything
>>>> that needs to be reported is not a hint. This is a valid approach of
>>>> course, but then I think some modifications to the comments in error.h
>>>> would not hurt. And maybe something with verbose would be more
>>>> expressive name.
>>>>
>>>> I hope all this makes some sense and ain't pure waste of time...
>>>
>>> No, it never hurts to question whether the design is optimal, and it's
>>> better to question first to know whether it is even worth patching
>>> things to behave differently, rather than spending time patching it only
>>> to have a maintainer clarify that the patch can't be accepted because of
>>> some design constraint.  So I still hope Markus will chime in.
>>>
>>
>> For this patch I went with Dave's proposal so I have no acute interest
>> in changing this.
>>
>> Conceptually, for me it really boils down to the question: Is it reasonable
>> to assume that we are interested in what went wrong (error message)?
>>
>> If yes, we are good as is. If no, we should not drop hint in QMP context.
>>
>> Thanks for your time. I think we provided Markus with enough input to
>> make his call :).
> 
> I had a quick peek at the patch that triggered this discussion.  What
> problem are you trying to solve?  According to your cover letter, it's
> "to specify a hint for the case a vmstate equal assertion".  How is
> nicer assertion failures related to QMP?  Am I confused?


The problem is solved by d2164ad ("vmstate: error hint for failed equal
checks", 2017-06-23).

The assertions ain't assertions in sense of the C programming
language. Maybe calling these 'checks' instead of 'assertions' in the
cover letter (like in the subject) would have been better. If one of
these 'assertions' fail qemu is supposed to abort the initiated load
(migration), state the reason, and terminate normally. In this sense these
'assertions' are similar to the assertions in our unit tests (those fail
a test, and similarly to these do not terminate the program).

The problem I was trying to solve is that the message generated by these
checks looked something like "5 != 4" which is OK if the check is never
supposed to fail, but not satisfactory for something we have to live
with.

Sorry for the confusion.

Regards,
Halil
Markus Armbruster July 4, 2017, 6:42 a.m. UTC | #13
Halil Pasic <pasic@linux.vnet.ibm.com> writes:

> On 07/03/2017 03:52 PM, Markus Armbruster wrote:
>> Halil Pasic <pasic@linux.vnet.ibm.com> writes:
>> 
>>> On 06/30/2017 04:54 PM, Eric Blake wrote:
>>>> On 06/30/2017 09:41 AM, Halil Pasic wrote:
>>>>>>> 'This' basically boils down to the question and
>>>>>>> 'Why aren't hints reported in QMP context?'
>>>>>>
>>>>>> QMP is supposed to be machine-parseable.  Hints are supposed to be
>>>>>> human-readable. If you have a machine managing the monitor, the hint
>>>>>> adds nothing but bandwidth consumption, because machine should not be
>>>>>> parsing the human portion of the error message in the first place (as it
>>>>>> is, libvirt already just logs the human-readable portion of a message,
>>>>>> and bases its actions solely on the machine-stable portions of an error
>>>>>> reply: namely, whether an error was sent at all, and occasionally, what
>>>>>> error class was used for that error - there's no guarantee a human will
>>>>>> be reading the log, though).
[...]
>>>>> From prior experiences I'm more used to think about error messages as
>>>>> something meant for human consumption, and expressing things expected to
>>>>> be relevant for some kind of client code in a different way (optimized
>>>>> for machine consumption).
>>>>>
>>>>> If however the error message ain't part of the machine relevant portion,
>>>>> then the same argument applies as to the 'hint', and I don't see the
>>>>> reason for handling hints differently. Do you agree with my
>>>>> argumentation?
>>>>
>>>> Indeed, it may not hurt to start passing the hints over the wire (errors
>>>> would then consume more bandwidth, but errors are not the hot path).
>>>> And I'm not necessarily opposed to that change, so much as trying to
>>>> document why it is not currently the case.  At the same time, I probably
>>>> won't be the one writing a path to populate the hint information into
>>>> the QMP error, as I don't have any reason to use the hint when
>>>> controlling libvirt (except maybe for logging, but there, the hint is
>>>> not going to help the end user, because it's not the end-user's fault
>>>> that libvirt used the API wrong to get a hint in the first place).
>>>
>>> For me both human readable things make sense only for error reporting
>>> (effectively logging). Error.msg should IMHO be different, than Error.hint.
>>> The existence of an error should be indicated by the Error object.
>> 
>> Consider this one from qemu-option.c:
>> 
>>         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
>>                    "a non-negative number below 2^64");
>>         error_append_hint(errp, "Optional suffix k, M, G, T, P or E means"
>>                           " kilo-, mega-, giga-, tera-, peta-\n"
>>                           "and exabytes, respectively.\n");
>> 
>> The hint is helpful for a human command line or HMP user.  It's actively
>> misleading in QMP.
>
> I agree.
>
>> Totally fine, it's how the "hint" feature is meant
>> to be used.
>> 
>
> Was not aware.
>
>> If we have errors that can't be adequately explained in a single error
>> message, we may need a way to add more explanation.  error_append_hint()
>> isn't.
>> 
>
> Was not aware. Using hint in this very situation was suggested by Connie,
> and I assumed she is long enough with the project to know...
>
> In fact looking at  include/qapi/error.h:
> """
> /*
>  * Error reporting system loosely patterned after Glib's GError.
>  *
>  * Create an error:
>  *     error_setg(&err, "situation normal, all fouled up");
>  *
>  * Create an error and add additional explanation:
>  *     error_setg(&err, "invalid quark");
>  *     error_append_hint(&err, "Valid quarks are up, down, strange, "
>  *                       "charm, top, bottom.\n");
>  *
>  * Do *not* contract this to
>  *     error_setg(&err, "invalid quark\n"
>  *                "Valid quarks are up, down, strange, charm, top, bottom.");
> """
>
> my understanding was and is still the exact opposite of what you say:
> error_append_hint is for adding more explanation.
>
> Furthermore 
> """
> /*
>  * Append a printf-style human-readable explanation to an existing error.
>  * @errp may be NULL, but not &error_fatal or &error_abort.
>  * Trivially the case if you call it only after error_setg() or
>  * error_propagate().
>  * May be called multiple times.  The resulting hint should end with a
>  * newline.
>  */
> void error_append_hint(Error **errp, const char *fmt, ...)
> """
>
> Assuming that error_append_hint() isn't for adding more explanation,
> IMHO the doc does not adequately explain what it is for.

You're right, it doesn't.

> I have also failed to find any hint in qapi/error.h which is AFAIU
> documenting the error api about this human-readable explanation
> appended to an existing error by error_append_hint() is to be discarded
> if the error is reported in QMP context.
>
> Am I reading the api doc incorrectly, or did the documentation and
> de-facto api diverge (behavior)?

I added documentation after I inherited this subsystem, in response to
recurring questions on proper use of the interface.  I failed to fully
capture the hint feature's intent.  I'll post a patch.

>>>>>> If something absolutely must be reported, then it is not a hint, and
>>>>>> shouldn't be using the hint mechanism.
>> 
>> Exactly.
>> 
>
> Perfectly fine with me provided the apidoc tells me clearly what the hint is
> for, and what it is not for.
>
>>>>> I find it hard to formulate criteria for 'must be reported'. I'm afraid
>>>>> this is backwards logic: since the hint may not be reported everything
>>>>> that needs to be reported is not a hint. This is a valid approach of
>>>>> course, but then I think some modifications to the comments in error.h
>>>>> would not hurt. And maybe something with verbose would be more
>>>>> expressive name.
>>>>>
>>>>> I hope all this makes some sense and ain't pure waste of time...
>>>>
>>>> No, it never hurts to question whether the design is optimal, and it's
>>>> better to question first to know whether it is even worth patching
>>>> things to behave differently, rather than spending time patching it only
>>>> to have a maintainer clarify that the patch can't be accepted because of
>>>> some design constraint.  So I still hope Markus will chime in.
>>>>
>>>
>>> For this patch I went with Dave's proposal so I have no acute interest
>>> in changing this.
>>>
>>> Conceptually, for me it really boils down to the question: Is it reasonable
>>> to assume that we are interested in what went wrong (error message)?
>>>
>>> If yes, we are good as is. If no, we should not drop hint in QMP context.
>>>
>>> Thanks for your time. I think we provided Markus with enough input to
>>> make his call :).
>> 
>> I had a quick peek at the patch that triggered this discussion.  What
>> problem are you trying to solve?  According to your cover letter, it's
>> "to specify a hint for the case a vmstate equal assertion".  How is
>> nicer assertion failures related to QMP?  Am I confused?
>
>
> The problem is solved by d2164ad ("vmstate: error hint for failed equal
> checks", 2017-06-23).

The way the commit uses error_report() and error_printf() looks good to
me.

> The assertions ain't assertions in sense of the C programming
> language. Maybe calling these 'checks' instead of 'assertions' in the
> cover letter (like in the subject) would have been better. If one of
> these 'assertions' fail qemu is supposed to abort the initiated load
> (migration), state the reason, and terminate normally. In this sense these
> 'assertions' are similar to the assertions in our unit tests (those fail
> a test, and similarly to these do not terminate the program).
>
> The problem I was trying to solve is that the message generated by these
> checks looked something like "5 != 4" which is OK if the check is never
> supposed to fail, but not satisfactory for something we have to live
> with.
>
> Sorry for the confusion.

No problem :)
Halil Pasic July 4, 2017, 11:25 a.m. UTC | #14
On 07/04/2017 08:42 AM, Markus Armbruster wrote:
> Halil Pasic <pasic@linux.vnet.ibm.com> writes:
> 
>> On 07/03/2017 03:52 PM, Markus Armbruster wrote:
>>> Halil Pasic <pasic@linux.vnet.ibm.com> writes:
>>>
>>>> On 06/30/2017 04:54 PM, Eric Blake wrote:
>>>>> On 06/30/2017 09:41 AM, Halil Pasic wrote:
[..]
>>> If we have errors that can't be adequately explained in a single error
>>> message, we may need a way to add more explanation.  error_append_hint()
>>> isn't.
>>>
>>
>> Was not aware. Using hint in this very situation was suggested by Connie,
>> and I assumed she is long enough with the project to know...
>>
>> In fact looking at  include/qapi/error.h:
>> """
>> /*
>>  * Error reporting system loosely patterned after Glib's GError.
>>  *
>>  * Create an error:
>>  *     error_setg(&err, "situation normal, all fouled up");
>>  *
>>  * Create an error and add additional explanation:
>>  *     error_setg(&err, "invalid quark");
>>  *     error_append_hint(&err, "Valid quarks are up, down, strange, "
>>  *                       "charm, top, bottom.\n");
>>  *
>>  * Do *not* contract this to
>>  *     error_setg(&err, "invalid quark\n"
>>  *                "Valid quarks are up, down, strange, charm, top, bottom.");
>> """
>>
>> my understanding was and is still the exact opposite of what you say:
>> error_append_hint is for adding more explanation.
>>
>> Furthermore 
>> """
>> /*
>>  * Append a printf-style human-readable explanation to an existing error.
>>  * @errp may be NULL, but not &error_fatal or &error_abort.
>>  * Trivially the case if you call it only after error_setg() or
>>  * error_propagate().
>>  * May be called multiple times.  The resulting hint should end with a
>>  * newline.
>>  */
>> void error_append_hint(Error **errp, const char *fmt, ...)
>> """
>>
>> Assuming that error_append_hint() isn't for adding more explanation,
>> IMHO the doc does not adequately explain what it is for.
> 
> You're right, it doesn't.
> 
>> I have also failed to find any hint in qapi/error.h which is AFAIU
>> documenting the error api about this human-readable explanation
>> appended to an existing error by error_append_hint() is to be discarded
>> if the error is reported in QMP context.
>>
>> Am I reading the api doc incorrectly, or did the documentation and
>> de-facto api diverge (behavior)?
> 
> I added documentation after I inherited this subsystem, in response to
> recurring questions on proper use of the interface.  I failed to fully
> capture the hint feature's intent.  I'll post a patch.
> 

Hey, IMHO error.h is one of the better documented corners of the QEMU
code base. If you like put me on cc for this promised patch.

>>>>>>> If something absolutely must be reported, then it is not a hint, and
>>>>>>> shouldn't be using the hint mechanism.
>>>
>>> Exactly.
>>>
>>
>> Perfectly fine with me provided the apidoc tells me clearly what the hint is
>> for, and what it is not for.
>>
>>>>>> I find it hard to formulate criteria for 'must be reported'. I'm afraid
>>>>>> this is backwards logic: since the hint may not be reported everything
>>>>>> that needs to be reported is not a hint. This is a valid approach of
>>>>>> course, but then I think some modifications to the comments in error.h
>>>>>> would not hurt. And maybe something with verbose would be more
>>>>>> expressive name.
>>>>>>
>>>>>> I hope all this makes some sense and ain't pure waste of time...
>>>>>
>>>>> No, it never hurts to question whether the design is optimal, and it's
>>>>> better to question first to know whether it is even worth patching
>>>>> things to behave differently, rather than spending time patching it only
>>>>> to have a maintainer clarify that the patch can't be accepted because of
>>>>> some design constraint.  So I still hope Markus will chime in.
>>>>>
>>>>
>>>> For this patch I went with Dave's proposal so I have no acute interest
>>>> in changing this.
>>>>
>>>> Conceptually, for me it really boils down to the question: Is it reasonable
>>>> to assume that we are interested in what went wrong (error message)?
>>>>
>>>> If yes, we are good as is. If no, we should not drop hint in QMP context.
>>>>
>>>> Thanks for your time. I think we provided Markus with enough input to
>>>> make his call :).
>>>
>>> I had a quick peek at the patch that triggered this discussion.  What
>>> problem are you trying to solve?  According to your cover letter, it's
>>> "to specify a hint for the case a vmstate equal assertion".  How is
>>> nicer assertion failures related to QMP?  Am I confused?
>>
>>
>> The problem is solved by d2164ad ("vmstate: error hint for failed equal
>> checks", 2017-06-23).
> 
> The way the commit uses error_report() and error_printf() looks good to
> me.
> 

I'm glad to hear that. My confidence ain't very high because my understand
of the infrastructure is shallow (especially the interface between
libvirt/management software and qemu and end user). Because of that
I may have relied on the api-doc more that the more knowledgeable colleagues.

>> The assertions ain't assertions in sense of the C programming
>> language. Maybe calling these 'checks' instead of 'assertions' in the
>> cover letter (like in the subject) would have been better. If one of
>> these 'assertions' fail qemu is supposed to abort the initiated load
>> (migration), state the reason, and terminate normally. In this sense these
>> 'assertions' are similar to the assertions in our unit tests (those fail
>> a test, and similarly to these do not terminate the program).
>>
>> The problem I was trying to solve is that the message generated by these
>> checks looked something like "5 != 4" which is OK if the check is never
>> supposed to fail, but not satisfactory for something we have to live
>> with.
>>
>> Sorry for the confusion.
> 
> No problem :)
> 

I'm glad all resolves positively.

Thanks,
Halil
diff mbox

Patch

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 66895623da..d90d9b12ca 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -200,6 +200,7 @@  typedef enum {
 
 struct VMStateField {
     const char *name;
+    const char *err_hint;
     size_t offset;
     size_t size;
     size_t start;
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 7287c6baa6..84d0545a38 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -19,6 +19,7 @@ 
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "trace.h"
+#include "qapi/error.h"
 
 /* bool */
 
@@ -118,6 +119,7 @@  const VMStateInfo vmstate_info_int32 = {
 static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
                            VMStateField *field)
 {
+    Error *err = NULL;
     int32_t *v = pv;
     int32_t v2;
     qemu_get_sbe32s(f, &v2);
@@ -125,7 +127,11 @@  static int get_int32_equal(QEMUFile *f, void *pv, size_t size,
     if (*v == v2) {
         return 0;
     }
-    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
+    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
+    if (field->err_hint) {
+        error_append_hint(&err, "%s\n", field->err_hint);
+    }
+    error_report_err(err);
     return -EINVAL;
 }
 
@@ -259,6 +265,7 @@  const VMStateInfo vmstate_info_uint32 = {
 static int get_uint32_equal(QEMUFile *f, void *pv, size_t size,
                             VMStateField *field)
 {
+    Error *err = NULL;
     uint32_t *v = pv;
     uint32_t v2;
     qemu_get_be32s(f, &v2);
@@ -266,7 +273,11 @@  static int get_uint32_equal(QEMUFile *f, void *pv, size_t size,
     if (*v == v2) {
         return 0;
     }
-    error_report("%" PRIx32 " != %" PRIx32, *v, v2);
+    error_setg(&err, "%" PRIx32 " != %" PRIx32, *v, v2);
+    if (field->err_hint) {
+        error_append_hint(&err, "%s\n", field->err_hint);
+    }
+    error_report_err(err);
     return -EINVAL;
 }
 
@@ -333,6 +344,7 @@  const VMStateInfo vmstate_info_nullptr = {
 static int get_uint64_equal(QEMUFile *f, void *pv, size_t size,
                             VMStateField *field)
 {
+    Error *err = NULL;
     uint64_t *v = pv;
     uint64_t v2;
     qemu_get_be64s(f, &v2);
@@ -340,7 +352,11 @@  static int get_uint64_equal(QEMUFile *f, void *pv, size_t size,
     if (*v == v2) {
         return 0;
     }
-    error_report("%" PRIx64 " != %" PRIx64, *v, v2);
+    error_setg(&err, "%" PRIx64 " != %" PRIx64, *v, v2);
+    if (field->err_hint) {
+        error_append_hint(&err, "%s\n", field->err_hint);
+    }
+    error_report_err(err);
     return -EINVAL;
 }
 
@@ -356,6 +372,7 @@  const VMStateInfo vmstate_info_uint64_equal = {
 static int get_uint8_equal(QEMUFile *f, void *pv, size_t size,
                            VMStateField *field)
 {
+    Error *err = NULL;
     uint8_t *v = pv;
     uint8_t v2;
     qemu_get_8s(f, &v2);
@@ -363,7 +380,11 @@  static int get_uint8_equal(QEMUFile *f, void *pv, size_t size,
     if (*v == v2) {
         return 0;
     }
-    error_report("%x != %x", *v, v2);
+    error_setg(&err, "%x != %x", *v, v2);
+    if (field->err_hint) {
+        error_append_hint(&err, "%s\n", field->err_hint);
+    }
+    error_report_err(err);
     return -EINVAL;
 }
 
@@ -379,6 +400,7 @@  const VMStateInfo vmstate_info_uint8_equal = {
 static int get_uint16_equal(QEMUFile *f, void *pv, size_t size,
                             VMStateField *field)
 {
+    Error *err = NULL;
     uint16_t *v = pv;
     uint16_t v2;
     qemu_get_be16s(f, &v2);
@@ -386,7 +408,11 @@  static int get_uint16_equal(QEMUFile *f, void *pv, size_t size,
     if (*v == v2) {
         return 0;
     }
-    error_report("%x != %x", *v, v2);
+    error_setg(&err, "%x != %x", *v, v2);
+    if (field->err_hint) {
+        error_append_hint(&err, "%s\n", field->err_hint);
+    }
+    error_report_err(err);
     return -EINVAL;
 }