diff mbox

[03/14] job: Add error message for failing jobs

Message ID 20180525163327.23097-4-kwolf@redhat.com
State New
Headers show

Commit Message

Kevin Wolf May 25, 2018, 4:33 p.m. UTC
So far we relied on job->ret and strerror() to produce an error message
for failed jobs. Not surprisingly, this tends to result in completely
useless messages.

This adds a Job.error field that can contain an error string for a
failing job, and a parameter to job_completed() that sets the field. As
a default, if NULL is passed, we continue to use strerror(job->ret).

All existing callers are changed to pass NULL. They can be improved in
separate patches.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/qemu/job.h |  7 ++++++-
 block/backup.c     |  2 +-
 block/commit.c     |  2 +-
 block/mirror.c     |  2 +-
 block/stream.c     |  2 +-
 job-qmp.c          |  9 ++-------
 job.c              | 15 +++++++++++++--
 7 files changed, 25 insertions(+), 14 deletions(-)

Comments

Max Reitz May 29, 2018, 11:01 a.m. UTC | #1
On 2018-05-25 18:33, Kevin Wolf wrote:
> So far we relied on job->ret and strerror() to produce an error message
> for failed jobs. Not surprisingly, this tends to result in completely
> useless messages.
> 
> This adds a Job.error field that can contain an error string for a
> failing job, and a parameter to job_completed() that sets the field. As
> a default, if NULL is passed, we continue to use strerror(job->ret).
> 
> All existing callers are changed to pass NULL. They can be improved in
> separate patches.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  include/qemu/job.h |  7 ++++++-
>  block/backup.c     |  2 +-
>  block/commit.c     |  2 +-
>  block/mirror.c     |  2 +-
>  block/stream.c     |  2 +-
>  job-qmp.c          |  9 ++-------
>  job.c              | 15 +++++++++++++--
>  7 files changed, 25 insertions(+), 14 deletions(-)

There are some tests that call job_completed().  Those should be updated
as well.

> diff --git a/include/qemu/job.h b/include/qemu/job.h
> index 8c8badf75e..b2e1dd00b9 100644
> --- a/include/qemu/job.h
> +++ b/include/qemu/job.h
> @@ -124,6 +124,9 @@ typedef struct Job {
>      /** Estimated progress_current value at the completion of the job */
>      int64_t progress_total;
>  
> +    /** Error string for a failed job (NULL if job->ret == 0) */
> +    char *error;
> +

I think we should bind this directly to job->ret, i.e. this is NULL if
and only if job->ret == 0.  (Just because more constraints tend to make
things nicer.  And also because if you don't, then qmp_job_dismiss()
cannot assume that job->error is set on error, which would be a change
in behavior.)

>      /** ret code passed to job_completed. */
>      int ret;
> 
[...]

> diff --git a/job.c b/job.c
> index f026661b0f..fc39eaaa5e 100644
> --- a/job.c
> +++ b/job.c

job_prepare() (called by job_do_finalize() through job_txn_apply()) may
set job->ret.  If it does set it to a negative value, job_do_finalize()
will call job_completed_txn_abort(), which will eventually invoke
job_finalize_single(), which then runs job_update_rc() on the job.  This
is a bit too much code between setting job->ret and job->error for my
taste, I'd rather set job->error much sooner (e.g. by calling
job_update_rc() directly in job_prepare(), which I don't think would
change anything).

But if you think that this is just fine, then OK, because I don't think
the user can do QMP queries in between (it would still break the iff
relationship between job->ret and job->error, which I find desirable).

[...]

> @@ -661,6 +662,9 @@ static void job_update_rc(Job *job)
>      }
>      if (job->ret) {
>          job_state_transition(job, JOB_STATUS_ABORTING);
> +        if (!job->error) {
> +            job->error = g_strdup(strerror(-job->ret));
> +        }

If we do use an iff relationship between job->ret and job->error, this
should probably be inserted before job_state_transition().

Max

>      }
>  }
Jeff Cody May 29, 2018, 2:43 p.m. UTC | #2
On Fri, May 25, 2018 at 06:33:16PM +0200, Kevin Wolf wrote:
> So far we relied on job->ret and strerror() to produce an error message
> for failed jobs. Not surprisingly, this tends to result in completely
> useless messages.
> 
> This adds a Job.error field that can contain an error string for a
> failing job, and a parameter to job_completed() that sets the field. As
> a default, if NULL is passed, we continue to use strerror(job->ret).
> 
> All existing callers are changed to pass NULL. They can be improved in
> separate patches.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  include/qemu/job.h |  7 ++++++-
>  block/backup.c     |  2 +-
>  block/commit.c     |  2 +-
>  block/mirror.c     |  2 +-
>  block/stream.c     |  2 +-
>  job-qmp.c          |  9 ++-------
>  job.c              | 15 +++++++++++++--
>  7 files changed, 25 insertions(+), 14 deletions(-)
> 
> diff --git a/include/qemu/job.h b/include/qemu/job.h
> index 8c8badf75e..b2e1dd00b9 100644
> --- a/include/qemu/job.h
> +++ b/include/qemu/job.h
> @@ -124,6 +124,9 @@ typedef struct Job {
>      /** Estimated progress_current value at the completion of the job */
>      int64_t progress_total;
>  
> +    /** Error string for a failed job (NULL if job->ret == 0) */
> +    char *error;
> +
>      /** ret code passed to job_completed. */
>      int ret;
>  
> @@ -466,13 +469,15 @@ void job_transition_to_ready(Job *job);
>  /**
>   * @job: The job being completed.
>   * @ret: The status code.
> + * @error: The error message for a failing job (only with @ret < 0). If @ret is
> + *         negative, but NULL is given for @error, strerror() is used.
>   *
>   * Marks @job as completed. If @ret is non-zero, the job transaction it is part
>   * of is aborted. If @ret is zero, the job moves into the WAITING state. If it
>   * is the last job to complete in its transaction, all jobs in the transaction
>   * move from WAITING to PENDING.
>   */
> -void job_completed(Job *job, int ret);
> +void job_completed(Job *job, int ret, Error *error);
>  
>  /** Asynchronously complete the specified @job. */
>  void job_complete(Job *job, Error **errp);
> diff --git a/block/backup.c b/block/backup.c
> index 4e228e959b..5661435675 100644
> --- a/block/backup.c
> +++ b/block/backup.c
> @@ -321,7 +321,7 @@ static void backup_complete(Job *job, void *opaque)
>  {
>      BackupCompleteData *data = opaque;
>  
> -    job_completed(job, data->ret);
> +    job_completed(job, data->ret, NULL);
>      g_free(data);
>  }
>  
> diff --git a/block/commit.c b/block/commit.c
> index 620666161b..e1814d9693 100644
> --- a/block/commit.c
> +++ b/block/commit.c
> @@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque)
>       * bdrv_set_backing_hd() to fail. */
>      block_job_remove_all_bdrv(bjob);
>  
> -    job_completed(job, ret);
> +    job_completed(job, ret, NULL);
>      g_free(data);
>  
>      /* If bdrv_drop_intermediate() didn't already do that, remove the commit
> diff --git a/block/mirror.c b/block/mirror.c
> index dcb66ec3be..435268bbbf 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -581,7 +581,7 @@ static void mirror_exit(Job *job, void *opaque)
>      blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
>      blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
>  
> -    job_completed(job, data->ret);
> +    job_completed(job, data->ret, NULL);
>  
>      g_free(data);
>      bdrv_drained_end(src);
> diff --git a/block/stream.c b/block/stream.c
> index a5d6e0cf8a..9264b68a1e 100644
> --- a/block/stream.c
> +++ b/block/stream.c
> @@ -93,7 +93,7 @@ out:
>      }
>  
>      g_free(s->backing_file_str);
> -    job_completed(job, data->ret);
> +    job_completed(job, data->ret, NULL);
>      g_free(data);
>  }
>  
> diff --git a/job-qmp.c b/job-qmp.c
> index 7f38f63336..410775df61 100644
> --- a/job-qmp.c
> +++ b/job-qmp.c
> @@ -136,14 +136,9 @@ void qmp_job_dismiss(const char *id, Error **errp)
>  static JobInfo *job_query_single(Job *job, Error **errp)
>  {
>      JobInfo *info;
> -    const char *errmsg = NULL;
>  
>      assert(!job_is_internal(job));
>  
> -    if (job->ret < 0) {
> -        errmsg = strerror(-job->ret);
> -    }
> -
>      info = g_new(JobInfo, 1);
>      *info = (JobInfo) {
>          .id                 = g_strdup(job->id),
> @@ -151,8 +146,8 @@ static JobInfo *job_query_single(Job *job, Error **errp)
>          .status             = job->status,
>          .current_progress   = job->progress_current,
>          .total_progress     = job->progress_total,
> -        .has_error          = !!errmsg,
> -        .error              = g_strdup(errmsg),
> +        .has_error          = !!job->error,
> +        .error              = g_strdup(job->error),
>      };
>  
>      return info;
> diff --git a/job.c b/job.c
> index f026661b0f..fc39eaaa5e 100644
> --- a/job.c
> +++ b/job.c
> @@ -369,6 +369,7 @@ void job_unref(Job *job)
>  
>          QLIST_REMOVE(job, job_list);
>  
> +        g_free(job->error);
>          g_free(job->id);
>          g_free(job);
>      }
> @@ -661,6 +662,9 @@ static void job_update_rc(Job *job)
>      }
>      if (job->ret) {
>          job_state_transition(job, JOB_STATUS_ABORTING);
> +        if (!job->error) {
> +            job->error = g_strdup(strerror(-job->ret));
> +        }
>      }
>  }
>  
> @@ -855,10 +859,17 @@ static void job_completed_txn_success(Job *job)
>      }
>  }
>  
> -void job_completed(Job *job, int ret)
> +void job_completed(Job *job, int ret, Error *error)
>  {
>      assert(job && job->txn && !job_is_completed(job));
> +
>      job->ret = ret;
> +    if (error) {
> +        assert(job->ret < 0);

The assert here implies that only job->ret values < 0 are valid for error.
Elsewhere, we just check for non-zero values for error (for example, [1]).
Maybe we should relax this to just assert(job->ret) here?

> +        job->error = g_strdup(error_get_pretty(error));
> +        error_free(error);
> +    }
> +
>      job_update_rc(job);
>      trace_job_completed(job, ret, job->ret);

[1]

>      if (job->ret) {
          job_completed_txn_abort(job);
      } else {
          job_completed_txn_success(job);
      }


> @@ -876,7 +887,7 @@ void job_cancel(Job *job, bool force)
>      }
>      job_cancel_async(job, force);
>      if (!job_started(job)) {
> -        job_completed(job, -ECANCELED);
> +        job_completed(job, -ECANCELED, NULL);
>      } else if (job->deferred_to_main_loop) {
>          job_completed_txn_abort(job);
>      } else {
> -- 
> 2.13.6
> 
>
Kevin Wolf May 29, 2018, 7:54 p.m. UTC | #3
Am 29.05.2018 um 16:43 hat Jeff Cody geschrieben:
> On Fri, May 25, 2018 at 06:33:16PM +0200, Kevin Wolf wrote:
> > So far we relied on job->ret and strerror() to produce an error message
> > for failed jobs. Not surprisingly, this tends to result in completely
> > useless messages.
> > 
> > This adds a Job.error field that can contain an error string for a
> > failing job, and a parameter to job_completed() that sets the field. As
> > a default, if NULL is passed, we continue to use strerror(job->ret).
> > 
> > All existing callers are changed to pass NULL. They can be improved in
> > separate patches.
> > 
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  include/qemu/job.h |  7 ++++++-
> >  block/backup.c     |  2 +-
> >  block/commit.c     |  2 +-
> >  block/mirror.c     |  2 +-
> >  block/stream.c     |  2 +-
> >  job-qmp.c          |  9 ++-------
> >  job.c              | 15 +++++++++++++--
> >  7 files changed, 25 insertions(+), 14 deletions(-)
> > 
> > diff --git a/include/qemu/job.h b/include/qemu/job.h
> > index 8c8badf75e..b2e1dd00b9 100644
> > --- a/include/qemu/job.h
> > +++ b/include/qemu/job.h
> > @@ -124,6 +124,9 @@ typedef struct Job {
> >      /** Estimated progress_current value at the completion of the job */
> >      int64_t progress_total;
> >  
> > +    /** Error string for a failed job (NULL if job->ret == 0) */
> > +    char *error;
> > +
> >      /** ret code passed to job_completed. */
> >      int ret;
> >  
> > @@ -466,13 +469,15 @@ void job_transition_to_ready(Job *job);
> >  /**
> >   * @job: The job being completed.
> >   * @ret: The status code.
> > + * @error: The error message for a failing job (only with @ret < 0). If @ret is
> > + *         negative, but NULL is given for @error, strerror() is used.
> >   *
> >   * Marks @job as completed. If @ret is non-zero, the job transaction it is part
> >   * of is aborted. If @ret is zero, the job moves into the WAITING state. If it
> >   * is the last job to complete in its transaction, all jobs in the transaction
> >   * move from WAITING to PENDING.
> >   */
> > -void job_completed(Job *job, int ret);
> > +void job_completed(Job *job, int ret, Error *error);
> >  
> >  /** Asynchronously complete the specified @job. */
> >  void job_complete(Job *job, Error **errp);
> > diff --git a/block/backup.c b/block/backup.c
> > index 4e228e959b..5661435675 100644
> > --- a/block/backup.c
> > +++ b/block/backup.c
> > @@ -321,7 +321,7 @@ static void backup_complete(Job *job, void *opaque)
> >  {
> >      BackupCompleteData *data = opaque;
> >  
> > -    job_completed(job, data->ret);
> > +    job_completed(job, data->ret, NULL);
> >      g_free(data);
> >  }
> >  
> > diff --git a/block/commit.c b/block/commit.c
> > index 620666161b..e1814d9693 100644
> > --- a/block/commit.c
> > +++ b/block/commit.c
> > @@ -117,7 +117,7 @@ static void commit_complete(Job *job, void *opaque)
> >       * bdrv_set_backing_hd() to fail. */
> >      block_job_remove_all_bdrv(bjob);
> >  
> > -    job_completed(job, ret);
> > +    job_completed(job, ret, NULL);
> >      g_free(data);
> >  
> >      /* If bdrv_drop_intermediate() didn't already do that, remove the commit
> > diff --git a/block/mirror.c b/block/mirror.c
> > index dcb66ec3be..435268bbbf 100644
> > --- a/block/mirror.c
> > +++ b/block/mirror.c
> > @@ -581,7 +581,7 @@ static void mirror_exit(Job *job, void *opaque)
> >      blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
> >      blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
> >  
> > -    job_completed(job, data->ret);
> > +    job_completed(job, data->ret, NULL);
> >  
> >      g_free(data);
> >      bdrv_drained_end(src);
> > diff --git a/block/stream.c b/block/stream.c
> > index a5d6e0cf8a..9264b68a1e 100644
> > --- a/block/stream.c
> > +++ b/block/stream.c
> > @@ -93,7 +93,7 @@ out:
> >      }
> >  
> >      g_free(s->backing_file_str);
> > -    job_completed(job, data->ret);
> > +    job_completed(job, data->ret, NULL);
> >      g_free(data);
> >  }
> >  
> > diff --git a/job-qmp.c b/job-qmp.c
> > index 7f38f63336..410775df61 100644
> > --- a/job-qmp.c
> > +++ b/job-qmp.c
> > @@ -136,14 +136,9 @@ void qmp_job_dismiss(const char *id, Error **errp)
> >  static JobInfo *job_query_single(Job *job, Error **errp)
> >  {
> >      JobInfo *info;
> > -    const char *errmsg = NULL;
> >  
> >      assert(!job_is_internal(job));
> >  
> > -    if (job->ret < 0) {
> > -        errmsg = strerror(-job->ret);
> > -    }
> > -
> >      info = g_new(JobInfo, 1);
> >      *info = (JobInfo) {
> >          .id                 = g_strdup(job->id),
> > @@ -151,8 +146,8 @@ static JobInfo *job_query_single(Job *job, Error **errp)
> >          .status             = job->status,
> >          .current_progress   = job->progress_current,
> >          .total_progress     = job->progress_total,
> > -        .has_error          = !!errmsg,
> > -        .error              = g_strdup(errmsg),
> > +        .has_error          = !!job->error,
> > +        .error              = g_strdup(job->error),
> >      };
> >  
> >      return info;
> > diff --git a/job.c b/job.c
> > index f026661b0f..fc39eaaa5e 100644
> > --- a/job.c
> > +++ b/job.c
> > @@ -369,6 +369,7 @@ void job_unref(Job *job)
> >  
> >          QLIST_REMOVE(job, job_list);
> >  
> > +        g_free(job->error);
> >          g_free(job->id);
> >          g_free(job);
> >      }
> > @@ -661,6 +662,9 @@ static void job_update_rc(Job *job)
> >      }
> >      if (job->ret) {
> >          job_state_transition(job, JOB_STATUS_ABORTING);
> > +        if (!job->error) {
> > +            job->error = g_strdup(strerror(-job->ret));
> > +        }
> >      }
> >  }
> >  
> > @@ -855,10 +859,17 @@ static void job_completed_txn_success(Job *job)
> >      }
> >  }
> >  
> > -void job_completed(Job *job, int ret)
> > +void job_completed(Job *job, int ret, Error *error)
> >  {
> >      assert(job && job->txn && !job_is_completed(job));
> > +
> >      job->ret = ret;
> > +    if (error) {
> > +        assert(job->ret < 0);
> 
> The assert here implies that only job->ret values < 0 are valid for error.
> Elsewhere, we just check for non-zero values for error (for example, [1]).
> Maybe we should relax this to just assert(job->ret) here?

A positive error is usually a bug. I wouldn't even be sure if it should
be considered success (e.g. someone returned the result of bdrv_pwrite()
like the two places fixed at the start of this series) or we forgot to
negate errno.

I think it's better to be strict and catch such bugs.

Kevin
diff mbox

Patch

diff --git a/include/qemu/job.h b/include/qemu/job.h
index 8c8badf75e..b2e1dd00b9 100644
--- a/include/qemu/job.h
+++ b/include/qemu/job.h
@@ -124,6 +124,9 @@  typedef struct Job {
     /** Estimated progress_current value at the completion of the job */
     int64_t progress_total;
 
+    /** Error string for a failed job (NULL if job->ret == 0) */
+    char *error;
+
     /** ret code passed to job_completed. */
     int ret;
 
@@ -466,13 +469,15 @@  void job_transition_to_ready(Job *job);
 /**
  * @job: The job being completed.
  * @ret: The status code.
+ * @error: The error message for a failing job (only with @ret < 0). If @ret is
+ *         negative, but NULL is given for @error, strerror() is used.
  *
  * Marks @job as completed. If @ret is non-zero, the job transaction it is part
  * of is aborted. If @ret is zero, the job moves into the WAITING state. If it
  * is the last job to complete in its transaction, all jobs in the transaction
  * move from WAITING to PENDING.
  */
-void job_completed(Job *job, int ret);
+void job_completed(Job *job, int ret, Error *error);
 
 /** Asynchronously complete the specified @job. */
 void job_complete(Job *job, Error **errp);
diff --git a/block/backup.c b/block/backup.c
index 4e228e959b..5661435675 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -321,7 +321,7 @@  static void backup_complete(Job *job, void *opaque)
 {
     BackupCompleteData *data = opaque;
 
-    job_completed(job, data->ret);
+    job_completed(job, data->ret, NULL);
     g_free(data);
 }
 
diff --git a/block/commit.c b/block/commit.c
index 620666161b..e1814d9693 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -117,7 +117,7 @@  static void commit_complete(Job *job, void *opaque)
      * bdrv_set_backing_hd() to fail. */
     block_job_remove_all_bdrv(bjob);
 
-    job_completed(job, ret);
+    job_completed(job, ret, NULL);
     g_free(data);
 
     /* If bdrv_drop_intermediate() didn't already do that, remove the commit
diff --git a/block/mirror.c b/block/mirror.c
index dcb66ec3be..435268bbbf 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -581,7 +581,7 @@  static void mirror_exit(Job *job, void *opaque)
     blk_set_perm(bjob->blk, 0, BLK_PERM_ALL, &error_abort);
     blk_insert_bs(bjob->blk, mirror_top_bs, &error_abort);
 
-    job_completed(job, data->ret);
+    job_completed(job, data->ret, NULL);
 
     g_free(data);
     bdrv_drained_end(src);
diff --git a/block/stream.c b/block/stream.c
index a5d6e0cf8a..9264b68a1e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -93,7 +93,7 @@  out:
     }
 
     g_free(s->backing_file_str);
-    job_completed(job, data->ret);
+    job_completed(job, data->ret, NULL);
     g_free(data);
 }
 
diff --git a/job-qmp.c b/job-qmp.c
index 7f38f63336..410775df61 100644
--- a/job-qmp.c
+++ b/job-qmp.c
@@ -136,14 +136,9 @@  void qmp_job_dismiss(const char *id, Error **errp)
 static JobInfo *job_query_single(Job *job, Error **errp)
 {
     JobInfo *info;
-    const char *errmsg = NULL;
 
     assert(!job_is_internal(job));
 
-    if (job->ret < 0) {
-        errmsg = strerror(-job->ret);
-    }
-
     info = g_new(JobInfo, 1);
     *info = (JobInfo) {
         .id                 = g_strdup(job->id),
@@ -151,8 +146,8 @@  static JobInfo *job_query_single(Job *job, Error **errp)
         .status             = job->status,
         .current_progress   = job->progress_current,
         .total_progress     = job->progress_total,
-        .has_error          = !!errmsg,
-        .error              = g_strdup(errmsg),
+        .has_error          = !!job->error,
+        .error              = g_strdup(job->error),
     };
 
     return info;
diff --git a/job.c b/job.c
index f026661b0f..fc39eaaa5e 100644
--- a/job.c
+++ b/job.c
@@ -369,6 +369,7 @@  void job_unref(Job *job)
 
         QLIST_REMOVE(job, job_list);
 
+        g_free(job->error);
         g_free(job->id);
         g_free(job);
     }
@@ -661,6 +662,9 @@  static void job_update_rc(Job *job)
     }
     if (job->ret) {
         job_state_transition(job, JOB_STATUS_ABORTING);
+        if (!job->error) {
+            job->error = g_strdup(strerror(-job->ret));
+        }
     }
 }
 
@@ -855,10 +859,17 @@  static void job_completed_txn_success(Job *job)
     }
 }
 
-void job_completed(Job *job, int ret)
+void job_completed(Job *job, int ret, Error *error)
 {
     assert(job && job->txn && !job_is_completed(job));
+
     job->ret = ret;
+    if (error) {
+        assert(job->ret < 0);
+        job->error = g_strdup(error_get_pretty(error));
+        error_free(error);
+    }
+
     job_update_rc(job);
     trace_job_completed(job, ret, job->ret);
     if (job->ret) {
@@ -876,7 +887,7 @@  void job_cancel(Job *job, bool force)
     }
     job_cancel_async(job, force);
     if (!job_started(job)) {
-        job_completed(job, -ECANCELED);
+        job_completed(job, -ECANCELED, NULL);
     } else if (job->deferred_to_main_loop) {
         job_completed_txn_abort(job);
     } else {