diff mbox series

[1/4] dma-fence: Propagate errors to dma-fence-array container

Message ID 20190810153430.30636-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/4] dma-fence: Propagate errors to dma-fence-array container | expand

Commit Message

Chris Wilson Aug. 10, 2019, 3:34 p.m. UTC
When one of the array of fences is signaled, propagate its errors to the
parent fence-array (keeping the first error to be raised).

v2: Opencode cmpxchg_local to avoid compiler freakout.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Gustavo Padovan <gustavo@padovan.org>
---
 drivers/dma-buf/dma-fence-array.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

Comments

Christian König Aug. 11, 2019, 8:58 a.m. UTC | #1
Am 10.08.19 um 17:34 schrieb Chris Wilson:
> When one of the array of fences is signaled, propagate its errors to the
> parent fence-array (keeping the first error to be raised).
>
> v2: Opencode cmpxchg_local to avoid compiler freakout.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Gustavo Padovan <gustavo@padovan.org>
> ---
>   drivers/dma-buf/dma-fence-array.c | 15 +++++++++++++++
>   1 file changed, 15 insertions(+)
>
> diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
> index 12c6f64c0bc2..d90675bb4fcc 100644
> --- a/drivers/dma-buf/dma-fence-array.c
> +++ b/drivers/dma-buf/dma-fence-array.c
> @@ -13,6 +13,12 @@
>   #include <linux/slab.h>
>   #include <linux/dma-fence-array.h>
>   
> +static void fence_set_error_once(struct dma_fence *fence, int error)

I would use a dma_fence_array prefix for all names in the file.

> +{
> +	if (!fence->error && error)
> +		dma_fence_set_error(fence, error);
> +}
> +
>   static const char *dma_fence_array_get_driver_name(struct dma_fence *fence)
>   {
>   	return "dma_fence_array";
> @@ -38,6 +44,13 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
>   		container_of(cb, struct dma_fence_array_cb, cb);
>   	struct dma_fence_array *array = array_cb->array;
>   
> +	/*
> +	 * Propagate the first error reported by any of our fences, but only
> +	 * before we ourselves are signaled.
> +	 */
> +	if (atomic_read(&array->num_pending) > 0)
> +		fence_set_error_once(&array->base, f->error);

That is racy even if you check the atomic because num_pending can be 
initialized to 1 for signal any arrays as well.

I suggest to rather test in dma_fence_array_set_error_once if we got an 
error and if yes grab the sequence lock and test if we are already 
signaled or not.

Christian.

> +
>   	if (atomic_dec_and_test(&array->num_pending))
>   		irq_work_queue(&array->work);
>   	else
> @@ -63,6 +76,8 @@ static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
>   		dma_fence_get(&array->base);
>   		if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
>   					   dma_fence_array_cb_func)) {
> +			fence_set_error_once(&array->base,
> +					     array->fences[i]->error);
>   			dma_fence_put(&array->base);
>   			if (atomic_dec_and_test(&array->num_pending))
>   				return false;
Chris Wilson Aug. 11, 2019, 11:56 a.m. UTC | #2
Quoting Koenig, Christian (2019-08-11 09:58:31)
> Am 10.08.19 um 17:34 schrieb Chris Wilson:
> > +     /*
> > +      * Propagate the first error reported by any of our fences, but only
> > +      * before we ourselves are signaled.
> > +      */
> > +     if (atomic_read(&array->num_pending) > 0)
> > +             fence_set_error_once(&array->base, f->error);
> 
> That is racy even if you check the atomic because num_pending can be 
> initialized to 1 for signal any arrays as well.

We both agree that we don't care about the potential write tearing if
two errors occur simultaneous, either error will do for our error?

So it's just the matter of not marking the array as being in error if we
have already signaled.
 
> I suggest to rather test in dma_fence_array_set_error_once if we got an 
> error and if yes grab the sequence lock and test if we are already 
> signaled or not.

How about embracing the race with something like,

diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index d90675bb4fcc..c71c57d25e48 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -33,6 +33,8 @@ static void irq_dma_fence_array_work(struct irq_work *wrk)
 {
        struct dma_fence_array *array = container_of(wrk, typeof(*array), work);

+       fence_set_error_once(&array->base, READ_ONCE(array->pending_error));
+
        dma_fence_signal(&array->base);
        dma_fence_put(&array->base);
 }
@@ -48,8 +50,8 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
         * Propagate the first error reported by any of our fences, but only
         * before we ourselves are signaled.
         */
-       if (atomic_read(&array->num_pending) > 0)
-               fence_set_error_once(&array->base, f->error);
+       if (f->error && !array->pending_error)
+               WRITE_ONCE(array->pending_error, f->error);

        if (atomic_dec_and_test(&array->num_pending))
                irq_work_queue(&array->work);
@@ -156,6 +158,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
        array->num_fences = num_fences;
        atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
        array->fences = fences;
+       array->pending_error = 0;

        return array;
 }
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 303dd712220f..faaf70c524ae 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -42,6 +42,8 @@ struct dma_fence_array {
        atomic_t num_pending;
        struct dma_fence **fences;

+       int pending_error;
+
        struct irq_work work;
 };


That ensures there is no race between signaling and raising the error,
but accepts that multiple fences may try and raise an error. There is
still the potential for the signal-on-any to be flagged as an error by
the second fence, but I claim that race is immaterial as the second
fence could have been the signaler.
-Chris
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index 12c6f64c0bc2..d90675bb4fcc 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -13,6 +13,12 @@ 
 #include <linux/slab.h>
 #include <linux/dma-fence-array.h>
 
+static void fence_set_error_once(struct dma_fence *fence, int error)
+{
+	if (!fence->error && error)
+		dma_fence_set_error(fence, error);
+}
+
 static const char *dma_fence_array_get_driver_name(struct dma_fence *fence)
 {
 	return "dma_fence_array";
@@ -38,6 +44,13 @@  static void dma_fence_array_cb_func(struct dma_fence *f,
 		container_of(cb, struct dma_fence_array_cb, cb);
 	struct dma_fence_array *array = array_cb->array;
 
+	/*
+	 * Propagate the first error reported by any of our fences, but only
+	 * before we ourselves are signaled.
+	 */
+	if (atomic_read(&array->num_pending) > 0)
+		fence_set_error_once(&array->base, f->error);
+
 	if (atomic_dec_and_test(&array->num_pending))
 		irq_work_queue(&array->work);
 	else
@@ -63,6 +76,8 @@  static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
 		dma_fence_get(&array->base);
 		if (dma_fence_add_callback(array->fences[i], &cb[i].cb,
 					   dma_fence_array_cb_func)) {
+			fence_set_error_once(&array->base,
+					     array->fences[i]->error);
 			dma_fence_put(&array->base);
 			if (atomic_dec_and_test(&array->num_pending))
 				return false;