diff mbox

dma-buf/fence: Fix lock inversion within dma-fence-array

Message ID 20171114162719.30958-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Nov. 14, 2017, 4:27 p.m. UTC
Ages ago Rob Clark noted,

"Currently with fence-array, we have a potential deadlock situation.  If
we fence_add_callback() on an array-fence, the array-fence's lock is
acquired first, and in it's ->enable_signaling() callback, it will install
cbs on it's array-member fences, so the array-member's lock is acquired
second.

But in the signal path, the array-member's lock is acquired first, and
the array-fence's lock acquired second."

Rob proposed either extensive changes to dma-fence to unnest the
fence-array signaling, or to defer the signaling onto a workqueue. This
is a more refined version of the later, that should keep the latency
of the fence signaling to a minimum by using an irq-work, which is
executed asap.

Reported-by: Rob Clark <robdclark@gmail.com>
Suggested-by: Rob Clark <robdclark@gmail.com>
References: 1476635975-21981-1-git-send-email-robdclark@gmail.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Rob Clark <robdclark@gmail.com>
Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Christian König <christian.koenig@amd.com>
---
 drivers/base/Kconfig              |  1 +
 drivers/dma-buf/dma-fence-array.c | 14 ++++++++++++--
 include/linux/dma-fence-array.h   |  3 +++
 3 files changed, 16 insertions(+), 2 deletions(-)

Comments

Christian König Nov. 14, 2017, 4:42 p.m. UTC | #1
Am 14.11.2017 um 17:27 schrieb Chris Wilson:
> Ages ago Rob Clark noted,
>
> "Currently with fence-array, we have a potential deadlock situation.  If
> we fence_add_callback() on an array-fence, the array-fence's lock is
> acquired first, and in it's ->enable_signaling() callback, it will install
> cbs on it's array-member fences, so the array-member's lock is acquired
> second.
>
> But in the signal path, the array-member's lock is acquired first, and
> the array-fence's lock acquired second."
>
> Rob proposed either extensive changes to dma-fence to unnest the
> fence-array signaling

BTW: I've looked into this a bit as well to fix lock inversion with the 
GPU scheduler. Long story short: It would be really nice to have, but a 
pain to fix.

> , or to defer the signaling onto a workqueue. This
> is a more refined version of the later, that should keep the latency
> of the fence signaling to a minimum by using an irq-work, which is
> executed asap.
>
> Reported-by: Rob Clark <robdclark@gmail.com>
> Suggested-by: Rob Clark <robdclark@gmail.com>
> References: 1476635975-21981-1-git-send-email-robdclark@gmail.com
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Reviewed-by: Christian König <christian.koenig@amd.com>

> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Christian König <christian.koenig@amd.com>
> ---
>   drivers/base/Kconfig              |  1 +
>   drivers/dma-buf/dma-fence-array.c | 14 ++++++++++++--
>   include/linux/dma-fence-array.h   |  3 +++
>   3 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
> index 2f6614c9a229..2c3cab066871 100644
> --- a/drivers/base/Kconfig
> +++ b/drivers/base/Kconfig
> @@ -245,6 +245,7 @@ config DMA_SHARED_BUFFER
>   	bool
>   	default n
>   	select ANON_INODES
> +	select IRQ_WORK
>   	help
>   	  This option enables the framework for buffer-sharing between
>   	  multiple drivers. A buffer is associated with a file using driver
> diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
> index 0350829ba62e..dd1edfb27b61 100644
> --- a/drivers/dma-buf/dma-fence-array.c
> +++ b/drivers/dma-buf/dma-fence-array.c
> @@ -31,6 +31,14 @@ static const char *dma_fence_array_get_timeline_name(struct dma_fence *fence)
>   	return "unbound";
>   }
>   
> +static void irq_dma_fence_array_work(struct irq_work *wrk)
> +{
> +	struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
> +
> +	dma_fence_signal(&array->base);
> +	dma_fence_put(&array->base);
> +}
> +
>   static void dma_fence_array_cb_func(struct dma_fence *f,
>   				    struct dma_fence_cb *cb)
>   {
> @@ -39,8 +47,9 @@ static void dma_fence_array_cb_func(struct dma_fence *f,
>   	struct dma_fence_array *array = array_cb->array;
>   
>   	if (atomic_dec_and_test(&array->num_pending))
> -		dma_fence_signal(&array->base);
> -	dma_fence_put(&array->base);
> +		irq_work_queue(&array->work);
> +	else
> +		dma_fence_put(&array->base);
>   }
>   
>   static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
> @@ -136,6 +145,7 @@ struct dma_fence_array *dma_fence_array_create(int num_fences,
>   	spin_lock_init(&array->lock);
>   	dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
>   		       context, seqno);
> +	init_irq_work(&array->work, irq_dma_fence_array_work);
>   
>   	array->num_fences = num_fences;
>   	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
> diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
> index 332a5420243c..bc8940ca280d 100644
> --- a/include/linux/dma-fence-array.h
> +++ b/include/linux/dma-fence-array.h
> @@ -21,6 +21,7 @@
>   #define __LINUX_DMA_FENCE_ARRAY_H
>   
>   #include <linux/dma-fence.h>
> +#include <linux/irq_work.h>
>   
>   /**
>    * struct dma_fence_array_cb - callback helper for fence array
> @@ -47,6 +48,8 @@ struct dma_fence_array {
>   	unsigned num_fences;
>   	atomic_t num_pending;
>   	struct dma_fence **fences;
> +
> +	struct irq_work work;
>   };
>   
>   extern const struct dma_fence_ops dma_fence_array_ops;
Chris Wilson Nov. 15, 2017, 1:20 p.m. UTC | #2
Quoting Chris Wilson (2017-11-14 16:27:19)
> Ages ago Rob Clark noted,
> 
> "Currently with fence-array, we have a potential deadlock situation.  If
> we fence_add_callback() on an array-fence, the array-fence's lock is
> acquired first, and in it's ->enable_signaling() callback, it will install
> cbs on it's array-member fences, so the array-member's lock is acquired
> second.
> 
> But in the signal path, the array-member's lock is acquired first, and
> the array-fence's lock acquired second."
> 
> Rob proposed either extensive changes to dma-fence to unnest the
> fence-array signaling, or to defer the signaling onto a workqueue. This
> is a more refined version of the later, that should keep the latency
> of the fence signaling to a minimum by using an irq-work, which is
> executed asap.
> 
> Reported-by: Rob Clark <robdclark@gmail.com>
> Suggested-by: Rob Clark <robdclark@gmail.com>

Testcase: igt/sw_sync/sync_multi_timeline_wait

> References: 1476635975-21981-1-git-send-email-robdclark@gmail.com
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Rob Clark <robdclark@gmail.com>
> Cc: Gustavo Padovan <gustavo.padovan@collabora.co.uk>
> Cc: Sumit Semwal <sumit.semwal@linaro.org>
> Cc: Christian König <christian.koenig@amd.com>
diff mbox

Patch

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 2f6614c9a229..2c3cab066871 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -245,6 +245,7 @@  config DMA_SHARED_BUFFER
 	bool
 	default n
 	select ANON_INODES
+	select IRQ_WORK
 	help
 	  This option enables the framework for buffer-sharing between
 	  multiple drivers. A buffer is associated with a file using driver
diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c
index 0350829ba62e..dd1edfb27b61 100644
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@@ -31,6 +31,14 @@  static const char *dma_fence_array_get_timeline_name(struct dma_fence *fence)
 	return "unbound";
 }
 
+static void irq_dma_fence_array_work(struct irq_work *wrk)
+{
+	struct dma_fence_array *array = container_of(wrk, typeof(*array), work);
+
+	dma_fence_signal(&array->base);
+	dma_fence_put(&array->base);
+}
+
 static void dma_fence_array_cb_func(struct dma_fence *f,
 				    struct dma_fence_cb *cb)
 {
@@ -39,8 +47,9 @@  static void dma_fence_array_cb_func(struct dma_fence *f,
 	struct dma_fence_array *array = array_cb->array;
 
 	if (atomic_dec_and_test(&array->num_pending))
-		dma_fence_signal(&array->base);
-	dma_fence_put(&array->base);
+		irq_work_queue(&array->work);
+	else
+		dma_fence_put(&array->base);
 }
 
 static bool dma_fence_array_enable_signaling(struct dma_fence *fence)
@@ -136,6 +145,7 @@  struct dma_fence_array *dma_fence_array_create(int num_fences,
 	spin_lock_init(&array->lock);
 	dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock,
 		       context, seqno);
+	init_irq_work(&array->work, irq_dma_fence_array_work);
 
 	array->num_fences = num_fences;
 	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h
index 332a5420243c..bc8940ca280d 100644
--- a/include/linux/dma-fence-array.h
+++ b/include/linux/dma-fence-array.h
@@ -21,6 +21,7 @@ 
 #define __LINUX_DMA_FENCE_ARRAY_H
 
 #include <linux/dma-fence.h>
+#include <linux/irq_work.h>
 
 /**
  * struct dma_fence_array_cb - callback helper for fence array
@@ -47,6 +48,8 @@  struct dma_fence_array {
 	unsigned num_fences;
 	atomic_t num_pending;
 	struct dma_fence **fences;
+
+	struct irq_work work;
 };
 
 extern const struct dma_fence_ops dma_fence_array_ops;