diff mbox

[v3] drm/i915/guc: Clean up locks in GuC

Message ID 1448928919-12372-1-git-send-email-yu.dai@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

yu.dai@intel.com Dec. 1, 2015, 12:15 a.m. UTC
From: Alex Dai <yu.dai@intel.com>

When GuC Work Queue is full, driver will wait GuC for avaliable
space by delaying 1ms. The wait needs to be out of spinlockirq /
unlock. Otherwise, lockup happens because jiffies won't be updated
due to irq is disabled. The unnecessary locks has been cleared.
dev->struct_mutex is used instead where needed.

Issue is found in igt/gem_close_race.

v3: Remove unnecessary whitespace churn
v2: Clean up wq_lock too
v1: Clean up host2guc lock as well

Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 12 ++++++------
 drivers/gpu/drm/i915/i915_guc_submission.c | 31 ++++++------------------------
 drivers/gpu/drm/i915/intel_guc.h           |  4 ----
 3 files changed, 12 insertions(+), 35 deletions(-)

Comments

Dave Gordon Dec. 2, 2015, 7:50 p.m. UTC | #1
On 01/12/15 00:15, Dai, Yu wrote:
> From: Alex Dai <yu.dai@intel.com>
>
> When GuC Work Queue is full, driver will wait GuC for avaliable
> space by delaying 1ms. The wait needs to be out of spinlockirq /
> unlock. Otherwise, lockup happens because jiffies won't be updated
> due to irq is disabled. The unnecessary locks has been cleared.
> dev->struct_mutex is used instead where needed.
>
> Issue is found in igt/gem_close_race.
>
> v3: Remove unnecessary whitespace churn
> v2: Clean up wq_lock too
> v1: Clean up host2guc lock as well
>
> Signed-off-by: Alex Dai <yu.dai@intel.com>

A few typos in the commit message, as pointed out by Nick in the 
previous review cycle, but the code looks OK so

Reviewed-by: Dave Gordon <david.s.gordon@intel.com>

and here's a suggestion for the commit message, with a
bit more background and more 'why' rather than 'what':

: For now, remove the spinlocks that protected the GuC's
: statistics block and work queue; they are only accessed
: by code that already holds the global struct_mutex, and
: so are redundant (until the big struct_mutex rewrite!).
:
: The specific problem that the spinlocks caused was that
: if the work queue was full, the driver would try to
: spinwait for one jiffy, but with interrupts disabled the
: jiffy count would not advance, leading to a system hang.
: The issue was found using test case igt/gem_close_race.
:
: The new version will usleep() instead, still holding
: the struct_mutex but without any spinlocks.

> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 12 ++++++------
>   drivers/gpu/drm/i915/i915_guc_submission.c | 31 ++++++------------------------
>   drivers/gpu/drm/i915/intel_guc.h           |  4 ----
>   3 files changed, 12 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index a728ff1..d6b7817 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2473,15 +2473,15 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	if (!HAS_GUC_SCHED(dev_priv->dev))
>   		return 0;
>
> +	if (mutex_lock_interruptible(&dev->struct_mutex))
> +		return 0;
> +
>   	/* Take a local copy of the GuC data, so we can dump it at leisure */
> -	spin_lock(&dev_priv->guc.host2guc_lock);
>   	guc = dev_priv->guc;
> -	if (guc.execbuf_client) {
> -		spin_lock(&guc.execbuf_client->wq_lock);
> +	if (guc.execbuf_client)
>   		client = *guc.execbuf_client;
> -		spin_unlock(&guc.execbuf_client->wq_lock);
> -	}
> -	spin_unlock(&dev_priv->guc.host2guc_lock);
> +
> +	mutex_unlock(&dev->struct_mutex);
>
>   	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
>   	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index ed9f100..a7f9785 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -86,7 +86,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
>   		return -EINVAL;
>
>   	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> -	spin_lock(&dev_priv->guc.host2guc_lock);
>
>   	dev_priv->guc.action_count += 1;
>   	dev_priv->guc.action_cmd = data[0];
> @@ -119,7 +118,6 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
>   	}
>   	dev_priv->guc.action_status = status;
>
> -	spin_unlock(&dev_priv->guc.host2guc_lock);
>   	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>
>   	return ret;
> @@ -292,16 +290,12 @@ static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
>   	const uint32_t cacheline_size = cache_line_size();
>   	uint32_t offset;
>
> -	spin_lock(&guc->host2guc_lock);
> -
>   	/* Doorbell uses a single cache line within a page */
>   	offset = offset_in_page(guc->db_cacheline);
>
>   	/* Moving to next cache line to reduce contention */
>   	guc->db_cacheline += cacheline_size;
>
> -	spin_unlock(&guc->host2guc_lock);
> -
>   	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
>   			offset, guc->db_cacheline, cacheline_size);
>
> @@ -322,13 +316,11 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
>   	const uint16_t end = start + half;
>   	uint16_t id;
>
> -	spin_lock(&guc->host2guc_lock);
>   	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
>   	if (id == end)
>   		id = GUC_INVALID_DOORBELL_ID;
>   	else
>   		bitmap_set(guc->doorbell_bitmap, id, 1);
> -	spin_unlock(&guc->host2guc_lock);
>
>   	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
>   			hi_pri ? "high" : "normal", id);
> @@ -338,9 +330,7 @@ static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
>
>   static void release_doorbell(struct intel_guc *guc, uint16_t id)
>   {
> -	spin_lock(&guc->host2guc_lock);
>   	bitmap_clear(guc->doorbell_bitmap, id, 1);
> -	spin_unlock(&guc->host2guc_lock);
>   }
>
>   /*
> @@ -487,16 +477,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>   	struct guc_process_desc *desc;
>   	void *base;
>   	u32 size = sizeof(struct guc_wq_item);
> -	int ret = 0, timeout_counter = 200;
> +	int ret = -ETIMEDOUT, timeout_counter = 200;
>
>   	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
>   	desc = base + gc->proc_desc_offset;
>
>   	while (timeout_counter-- > 0) {
> -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
> -				gc->wq_size) >= size, 1);
> -
> -		if (!ret) {
> +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>   			*offset = gc->wq_tail;
>
>   			/* advance the tail for next workqueue item */
> @@ -505,7 +492,11 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>
>   			/* this will break the loop */
>   			timeout_counter = 0;
> +			ret = 0;
>   		}
> +
> +		if (timeout_counter)
> +			usleep_range(1000, 2000);
>   	};
>
>   	kunmap_atomic(base);
> @@ -597,15 +588,12 @@ int i915_guc_submit(struct i915_guc_client *client,
>   {
>   	struct intel_guc *guc = client->guc;
>   	enum intel_ring_id ring_id = rq->ring->id;
> -	unsigned long flags;
>   	int q_ret, b_ret;
>
>   	/* Need this because of the deferred pin ctx and ring */
>   	/* Shall we move this right after ring is pinned? */
>   	lr_context_update(rq);
>
> -	spin_lock_irqsave(&client->wq_lock, flags);
> -
>   	q_ret = guc_add_workqueue_item(client, rq);
>   	if (q_ret == 0)
>   		b_ret = guc_ring_doorbell(client);
> @@ -620,12 +608,8 @@ int i915_guc_submit(struct i915_guc_client *client,
>   	} else {
>   		client->retcode = 0;
>   	}
> -	spin_unlock_irqrestore(&client->wq_lock, flags);
> -
> -	spin_lock(&guc->host2guc_lock);
>   	guc->submissions[ring_id] += 1;
>   	guc->last_seqno[ring_id] = rq->seqno;
> -	spin_unlock(&guc->host2guc_lock);
>
>   	return q_ret;
>   }
> @@ -768,7 +752,6 @@ static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
>   	client->client_obj = obj;
>   	client->wq_offset = GUC_DB_SIZE;
>   	client->wq_size = GUC_WQ_SIZE;
> -	spin_lock_init(&client->wq_lock);
>
>   	client->doorbell_offset = select_doorbell_cacheline(guc);
>
> @@ -871,8 +854,6 @@ int i915_guc_submission_init(struct drm_device *dev)
>   	if (!guc->ctx_pool_obj)
>   		return -ENOMEM;
>
> -	spin_lock_init(&dev_priv->guc.host2guc_lock);
> -
>   	ida_init(&guc->ctx_ids);
>
>   	guc_create_log(guc);
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 5ba5866..8229522 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -42,8 +42,6 @@ struct i915_guc_client {
>
>   	uint32_t wq_offset;
>   	uint32_t wq_size;
> -
> -	spinlock_t wq_lock;		/* Protects all data below	*/
>   	uint32_t wq_tail;
>
>   	/* GuC submission statistics & status */
> @@ -95,8 +93,6 @@ struct intel_guc {
>
>   	struct i915_guc_client *execbuf_client;
>
> -	spinlock_t host2guc_lock;	/* Protects all data below	*/
> -
>   	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
>   	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a728ff1..d6b7817 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2473,15 +2473,15 @@  static int i915_guc_info(struct seq_file *m, void *data)
 	if (!HAS_GUC_SCHED(dev_priv->dev))
 		return 0;
 
+	if (mutex_lock_interruptible(&dev->struct_mutex))
+		return 0;
+
 	/* Take a local copy of the GuC data, so we can dump it at leisure */
-	spin_lock(&dev_priv->guc.host2guc_lock);
 	guc = dev_priv->guc;
-	if (guc.execbuf_client) {
-		spin_lock(&guc.execbuf_client->wq_lock);
+	if (guc.execbuf_client)
 		client = *guc.execbuf_client;
-		spin_unlock(&guc.execbuf_client->wq_lock);
-	}
-	spin_unlock(&dev_priv->guc.host2guc_lock);
+
+	mutex_unlock(&dev->struct_mutex);
 
 	seq_printf(m, "GuC total action count: %llu\n", guc.action_count);
 	seq_printf(m, "GuC action failure count: %u\n", guc.action_fail);
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index ed9f100..a7f9785 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -86,7 +86,6 @@  static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
 		return -EINVAL;
 
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	spin_lock(&dev_priv->guc.host2guc_lock);
 
 	dev_priv->guc.action_count += 1;
 	dev_priv->guc.action_cmd = data[0];
@@ -119,7 +118,6 @@  static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len)
 	}
 	dev_priv->guc.action_status = status;
 
-	spin_unlock(&dev_priv->guc.host2guc_lock);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;
@@ -292,16 +290,12 @@  static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
 	const uint32_t cacheline_size = cache_line_size();
 	uint32_t offset;
 
-	spin_lock(&guc->host2guc_lock);
-
 	/* Doorbell uses a single cache line within a page */
 	offset = offset_in_page(guc->db_cacheline);
 
 	/* Moving to next cache line to reduce contention */
 	guc->db_cacheline += cacheline_size;
 
-	spin_unlock(&guc->host2guc_lock);
-
 	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
 			offset, guc->db_cacheline, cacheline_size);
 
@@ -322,13 +316,11 @@  static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
 	const uint16_t end = start + half;
 	uint16_t id;
 
-	spin_lock(&guc->host2guc_lock);
 	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
 	if (id == end)
 		id = GUC_INVALID_DOORBELL_ID;
 	else
 		bitmap_set(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 
 	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
 			hi_pri ? "high" : "normal", id);
@@ -338,9 +330,7 @@  static uint16_t assign_doorbell(struct intel_guc *guc, uint32_t priority)
 
 static void release_doorbell(struct intel_guc *guc, uint16_t id)
 {
-	spin_lock(&guc->host2guc_lock);
 	bitmap_clear(guc->doorbell_bitmap, id, 1);
-	spin_unlock(&guc->host2guc_lock);
 }
 
 /*
@@ -487,16 +477,13 @@  static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 	struct guc_process_desc *desc;
 	void *base;
 	u32 size = sizeof(struct guc_wq_item);
-	int ret = 0, timeout_counter = 200;
+	int ret = -ETIMEDOUT, timeout_counter = 200;
 
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
-				gc->wq_size) >= size, 1);
-
-		if (!ret) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			*offset = gc->wq_tail;
 
 			/* advance the tail for next workqueue item */
@@ -505,7 +492,11 @@  static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 
 			/* this will break the loop */
 			timeout_counter = 0;
+			ret = 0;
 		}
+
+		if (timeout_counter)
+			usleep_range(1000, 2000);
 	};
 
 	kunmap_atomic(base);
@@ -597,15 +588,12 @@  int i915_guc_submit(struct i915_guc_client *client,
 {
 	struct intel_guc *guc = client->guc;
 	enum intel_ring_id ring_id = rq->ring->id;
-	unsigned long flags;
 	int q_ret, b_ret;
 
 	/* Need this because of the deferred pin ctx and ring */
 	/* Shall we move this right after ring is pinned? */
 	lr_context_update(rq);
 
-	spin_lock_irqsave(&client->wq_lock, flags);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
@@ -620,12 +608,8 @@  int i915_guc_submit(struct i915_guc_client *client,
 	} else {
 		client->retcode = 0;
 	}
-	spin_unlock_irqrestore(&client->wq_lock, flags);
-
-	spin_lock(&guc->host2guc_lock);
 	guc->submissions[ring_id] += 1;
 	guc->last_seqno[ring_id] = rq->seqno;
-	spin_unlock(&guc->host2guc_lock);
 
 	return q_ret;
 }
@@ -768,7 +752,6 @@  static struct i915_guc_client *guc_client_alloc(struct drm_device *dev,
 	client->client_obj = obj;
 	client->wq_offset = GUC_DB_SIZE;
 	client->wq_size = GUC_WQ_SIZE;
-	spin_lock_init(&client->wq_lock);
 
 	client->doorbell_offset = select_doorbell_cacheline(guc);
 
@@ -871,8 +854,6 @@  int i915_guc_submission_init(struct drm_device *dev)
 	if (!guc->ctx_pool_obj)
 		return -ENOMEM;
 
-	spin_lock_init(&dev_priv->guc.host2guc_lock);
-
 	ida_init(&guc->ctx_ids);
 
 	guc_create_log(guc);
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 5ba5866..8229522 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -42,8 +42,6 @@  struct i915_guc_client {
 
 	uint32_t wq_offset;
 	uint32_t wq_size;
-
-	spinlock_t wq_lock;		/* Protects all data below	*/
 	uint32_t wq_tail;
 
 	/* GuC submission statistics & status */
@@ -95,8 +93,6 @@  struct intel_guc {
 
 	struct i915_guc_client *execbuf_client;
 
-	spinlock_t host2guc_lock;	/* Protects all data below	*/
-
 	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/