diff mbox

drm/i915/guc: Move wait for GuC out of spinlock/unlock

Message ID 1448319778-29282-1-git-send-email-yu.dai@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

yu.dai@intel.com Nov. 23, 2015, 11:02 p.m. UTC
From: Alex Dai <yu.dai@intel.com>

When GuC Work Queue is full, driver will wait GuC for avaliable
space by delaying 1ms. The wait needs to be out of spinlockirq /
unlock. Otherwise, lockup happens because jiffies won't be updated
dur to irq is disabled.

Issue is found in igt/gem_close_race.

Signed-off-by: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

Comments

Daniel Vetter Nov. 24, 2015, 1:04 p.m. UTC | #1
On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> From: Alex Dai <yu.dai@intel.com>
> 
> When GuC Work Queue is full, driver will wait GuC for avaliable
> space by delaying 1ms. The wait needs to be out of spinlockirq /
> unlock. Otherwise, lockup happens because jiffies won't be updated
> dur to irq is disabled.
> 
> Issue is found in igt/gem_close_race.
> 
> Signed-off-by: Alex Dai <yu.dai@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++----------
>  1 file changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index 0a6b007..1418397 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>  	union guc_doorbell_qw *db;
>  	void *base;
>  	int attempt = 2, ret = -EAGAIN;
> +	unsigned long flags;
>  
>  	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));

We don't need kmap_atomic anymore here now, since it's outside of the
spinlock.

>  	desc = base + gc->proc_desc_offset;
>  
> +	spin_lock_irqsave(&gc->wq_lock, flags);

Please don't use the super-generic _irqsave. It's expensive and results in
fragile code when someone accidentally reuses something in an interrupt
handler that was never meant to run in that context.

Instead please use the most specific funtion:
- spin_lock if you know you are in irq context.
- sipn_lock_irq if you know you are not.
- spin_lock_irqsave should be a big warning sign that your code has
  layering issues.

Please audit the entire guc code for the above two issues.

> +
>  	/* Update the tail so it is visible to GuC */
>  	desc->tail = gc->wq_tail;
>  
> @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct i915_guc_client *gc)
>  			db_exc.cookie = 1;
>  	}
>  
> +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> +
>  	kunmap_atomic(base);
> +
>  	return ret;
>  }
>  
> @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>  	struct guc_process_desc *desc;
>  	void *base;
>  	u32 size = sizeof(struct guc_wq_item);
> -	int ret = 0, timeout_counter = 200;
> +	int ret = -ETIMEDOUT, timeout_counter = 200;
> +	unsigned long flags;
>  
>  	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
>  	desc = base + gc->proc_desc_offset;
>  
>  	while (timeout_counter-- > 0) {
> -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
> -				gc->wq_size) >= size, 1);
> +		spin_lock_irqsave(&gc->wq_lock, flags);
>  
> -		if (!ret) {
> +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
>  			*offset = gc->wq_tail;
>  
>  			/* advance the tail for next workqueue item */
> @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
>  
>  			/* this will break the loop */
>  			timeout_counter = 0;
> +			ret = 0;
>  		}
> +
> +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> +
> +		if (timeout_counter)
> +			usleep_range(1000, 2000);

Do we really not have a interrupt/signal from the guc when it has cleared
up some space?

>  	};
>  
>  	kunmap_atomic(base);
> @@ -597,19 +609,17 @@ int i915_guc_submit(struct i915_guc_client *client,
>  {
>  	struct intel_guc *guc = client->guc;
>  	enum intel_ring_id ring_id = rq->ring->id;
> -	unsigned long flags;
>  	int q_ret, b_ret;
>  
>  	/* Need this because of the deferred pin ctx and ring */
>  	/* Shall we move this right after ring is pinned? */
>  	lr_context_update(rq);
>  
> -	spin_lock_irqsave(&client->wq_lock, flags);
> -
>  	q_ret = guc_add_workqueue_item(client, rq);
>  	if (q_ret == 0)
>  		b_ret = guc_ring_doorbell(client);
>  
> +	spin_lock(&guc->host2guc_lock);

So at first I thought there's a race now, but then I looked at what
host2guc and wq_lock protect. It seems like the only thing they do is
protect against debugfs, all the real protection against inconsistent
state is done through dev->struct_mutex.

Can't we just rip out all this spinlock business from the guc code?
It would be easier than fixing up the races in here.
-Daniel

>  	client->submissions[ring_id] += 1;
>  	if (q_ret) {
>  		client->q_fail += 1;
> @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client *client,
>  	} else {
>  		client->retcode = 0;
>  	}
> -	spin_unlock_irqrestore(&client->wq_lock, flags);
> -
> -	spin_lock(&guc->host2guc_lock);
>  	guc->submissions[ring_id] += 1;
>  	guc->last_seqno[ring_id] = rq->seqno;
>  	spin_unlock(&guc->host2guc_lock);
> -- 
> 2.5.0
>
Imre Deak Nov. 24, 2015, 1:26 p.m. UTC | #2
On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > From: Alex Dai <yu.dai@intel.com>
> > 
> > When GuC Work Queue is full, driver will wait GuC for avaliable
> > space by delaying 1ms. The wait needs to be out of spinlockirq /
> > unlock. Otherwise, lockup happens because jiffies won't be updated
> > dur to irq is disabled.
> > 
> > Issue is found in igt/gem_close_race.
> > 
> > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++-
> > ---------
> >  1 file changed, 17 insertions(+), 10 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > index 0a6b007..1418397 100644
> > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > i915_guc_client *gc)
> >  	union guc_doorbell_qw *db;
> >  	void *base;
> >  	int attempt = 2, ret = -EAGAIN;
> > +	unsigned long flags;
> >  
> >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > >client_obj, 0));
> 
> We don't need kmap_atomic anymore here now, since it's outside of the
> spinlock.
> 
> >  	desc = base + gc->proc_desc_offset;
> >  
> > +	spin_lock_irqsave(&gc->wq_lock, flags);
> 
> Please don't use the super-generic _irqsave. It's expensive and
> results in
> fragile code when someone accidentally reuses something in an
> interrupt
> handler that was never meant to run in that context.
> 
> Instead please use the most specific funtion:
> - spin_lock if you know you are in irq context.
> - sipn_lock_irq if you know you are not.

Right, and simply spin_lock() if the lock is not taken in IRQ context
ever.

> - spin_lock_irqsave should be a big warning sign that your code has
>   layering issues.
> 
> Please audit the entire guc code for the above two issues.

Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither of
them are called from IRQ context AFAICS, in which case a simple
spin_lock() would do.

--Imre

> > +
> >  	/* Update the tail so it is visible to GuC */
> >  	desc->tail = gc->wq_tail;
> >  
> > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > i915_guc_client *gc)
> >  			db_exc.cookie = 1;
> >  	}
> >  
> > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > +
> >  	kunmap_atomic(base);
> > +
> >  	return ret;
> >  }
> >  
> > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > i915_guc_client *gc, u32 *offset)
> >  	struct guc_process_desc *desc;
> >  	void *base;
> >  	u32 size = sizeof(struct guc_wq_item);
> > -	int ret = 0, timeout_counter = 200;
> > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > +	unsigned long flags;
> >  
> >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > >client_obj, 0));
> >  	desc = base + gc->proc_desc_offset;
> >  
> >  	while (timeout_counter-- > 0) {
> > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > desc->head,
> > -				gc->wq_size) >= size, 1);
> > +		spin_lock_irqsave(&gc->wq_lock, flags);
> >  
> > -		if (!ret) {
> > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > >wq_size) >= size) {
> >  			*offset = gc->wq_tail;
> >  
> >  			/* advance the tail for next workqueue
> > item */
> > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > i915_guc_client *gc, u32 *offset)
> >  
> >  			/* this will break the loop */
> >  			timeout_counter = 0;
> > +			ret = 0;
> >  		}
> > +
> > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > +
> > +		if (timeout_counter)
> > +			usleep_range(1000, 2000);
> 
> Do we really not have a interrupt/signal from the guc when it has
> cleared
> up some space?
> 
> >  	};
> >  
> >  	kunmap_atomic(base);
> > @@ -597,19 +609,17 @@ int i915_guc_submit(struct i915_guc_client
> > *client,
> >  {
> >  	struct intel_guc *guc = client->guc;
> >  	enum intel_ring_id ring_id = rq->ring->id;
> > -	unsigned long flags;
> >  	int q_ret, b_ret;
> >  
> >  	/* Need this because of the deferred pin ctx and ring */
> >  	/* Shall we move this right after ring is pinned? */
> >  	lr_context_update(rq);
> >  
> > -	spin_lock_irqsave(&client->wq_lock, flags);
> > -
> >  	q_ret = guc_add_workqueue_item(client, rq);
> >  	if (q_ret == 0)
> >  		b_ret = guc_ring_doorbell(client);
> >  
> > +	spin_lock(&guc->host2guc_lock);
> 
> So at first I thought there's a race now, but then I looked at what
> host2guc and wq_lock protect. It seems like the only thing they do is
> protect against debugfs, all the real protection against inconsistent
> state is done through dev->struct_mutex.
> 
> Can't we just rip out all this spinlock business from the guc code?
> It would be easier than fixing up the races in here.



> -Daniel
> 
> >  	client->submissions[ring_id] += 1;
> >  	if (q_ret) {
> >  		client->q_fail += 1;
> > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > *client,
> >  	} else {
> >  		client->retcode = 0;
> >  	}
> > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > -
> > -	spin_lock(&guc->host2guc_lock);
> >  	guc->submissions[ring_id] += 1;
> >  	guc->last_seqno[ring_id] = rq->seqno;
> >  	spin_unlock(&guc->host2guc_lock);
> > -- 
> > 2.5.0
> > 
>
yu.dai@intel.com Nov. 24, 2015, 5 p.m. UTC | #3
On 11/24/2015 05:26 AM, Imre Deak wrote:
> On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > > From: Alex Dai <yu.dai@intel.com>
> > >
> > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > > space by delaying 1ms. The wait needs to be out of spinlockirq /
> > > unlock. Otherwise, lockup happens because jiffies won't be updated
> > > dur to irq is disabled.
> > >
> > > Issue is found in igt/gem_close_race.
> > >
> > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27 +++++++++++++++++-
> > > ---------
> > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > index 0a6b007..1418397 100644
> > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > > i915_guc_client *gc)
> > >  	union guc_doorbell_qw *db;
> > >  	void *base;
> > >  	int attempt = 2, ret = -EAGAIN;
> > > +	unsigned long flags;
> > >
> > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > >client_obj, 0));
> >
> > We don't need kmap_atomic anymore here now, since it's outside of the
> > spinlock.
> >
> > >  	desc = base + gc->proc_desc_offset;
> > >
> > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> >
> > Please don't use the super-generic _irqsave. It's expensive and
> > results in
> > fragile code when someone accidentally reuses something in an
> > interrupt
> > handler that was never meant to run in that context.
> >
> > Instead please use the most specific funtion:
> > - spin_lock if you know you are in irq context.
> > - sipn_lock_irq if you know you are not.
>
> Right, and simply spin_lock() if the lock is not taken in IRQ context
> ever.

This is not in IRQ context. So I will use spin_lock_irq instead.
> > - spin_lock_irqsave should be a big warning sign that your code has
> >   layering issues.
> >
> > Please audit the entire guc code for the above two issues.
>
> Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
> debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither of
> them are called from IRQ context AFAICS, in which case a simple
> spin_lock() would do.
>
> --Imre
>
> > > +
> > >  	/* Update the tail so it is visible to GuC */
> > >  	desc->tail = gc->wq_tail;
> > >
> > > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > > i915_guc_client *gc)
> > >  			db_exc.cookie = 1;
> > >  	}
> > >
> > > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > +
> > >  	kunmap_atomic(base);
> > > +
> > >  	return ret;
> > >  }
> > >
> > > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > > i915_guc_client *gc, u32 *offset)
> > >  	struct guc_process_desc *desc;
> > >  	void *base;
> > >  	u32 size = sizeof(struct guc_wq_item);
> > > -	int ret = 0, timeout_counter = 200;
> > > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > > +	unsigned long flags;
> > >
> > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > >client_obj, 0));
> > >  	desc = base + gc->proc_desc_offset;
> > >
> > >  	while (timeout_counter-- > 0) {
> > > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > > desc->head,
> > > -				gc->wq_size) >= size, 1);
> > > +		spin_lock_irqsave(&gc->wq_lock, flags);
> > >
> > > -		if (!ret) {
> > > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > > >wq_size) >= size) {
> > >  			*offset = gc->wq_tail;
> > >
> > >  			/* advance the tail for next workqueue
> > > item */
> > > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > > i915_guc_client *gc, u32 *offset)
> > >
> > >  			/* this will break the loop */
> > >  			timeout_counter = 0;
> > > +			ret = 0;
> > >  		}
> > > +
> > > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > +
> > > +		if (timeout_counter)
> > > +			usleep_range(1000, 2000);
> >
> > Do we really not have a interrupt/signal from the guc when it has
> > cleared
> > up some space?
> >

This is not implemented in fw although I think it could be done through 
the guc to host interrupt. I am worry about that if we implement this, 
it will end up with driver handles too many interrupts (maybe same 
amount of context switch). However, ideally we don't want to handle 
interrupts at all.
> > >  	};
> > >
> > >  	kunmap_atomic(base);
> > > @@ -597,19 +609,17 @@ int i915_guc_submit(struct i915_guc_client
> > > *client,
> > >  {
> > >  	struct intel_guc *guc = client->guc;
> > >  	enum intel_ring_id ring_id = rq->ring->id;
> > > -	unsigned long flags;
> > >  	int q_ret, b_ret;
> > >
> > >  	/* Need this because of the deferred pin ctx and ring */
> > >  	/* Shall we move this right after ring is pinned? */
> > >  	lr_context_update(rq);
> > >
> > > -	spin_lock_irqsave(&client->wq_lock, flags);
> > > -
> > >  	q_ret = guc_add_workqueue_item(client, rq);
> > >  	if (q_ret == 0)
> > >  		b_ret = guc_ring_doorbell(client);
> > >
> > > +	spin_lock(&guc->host2guc_lock);
> >
> > So at first I thought there's a race now, but then I looked at what
> > host2guc and wq_lock protect. It seems like the only thing they do is
> > protect against debugfs, all the real protection against inconsistent
> > state is done through dev->struct_mutex.
> >
> > Can't we just rip out all this spinlock business from the guc code?
> > It would be easier than fixing up the races in here.
>
>

Yes, host2guc lock can be done through dev->struct_mutex. But definitely 
we don't want to interrupt the process when driver program guc work 
queue and ring the door bell.
> > -Daniel
> >
> > >  	client->submissions[ring_id] += 1;
> > >  	if (q_ret) {
> > >  		client->q_fail += 1;
> > > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > > *client,
> > >  	} else {
> > >  		client->retcode = 0;
> > >  	}
> > > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > > -
> > > -	spin_lock(&guc->host2guc_lock);
> > >  	guc->submissions[ring_id] += 1;
> > >  	guc->last_seqno[ring_id] = rq->seqno;
> > >  	spin_unlock(&guc->host2guc_lock);
> > > --
> > > 2.5.0
> > >
> >
Imre Deak Nov. 24, 2015, 5:05 p.m. UTC | #4
On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> 
> On 11/24/2015 05:26 AM, Imre Deak wrote:
> > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > > > From: Alex Dai <yu.dai@intel.com>
> > > > 
> > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> > > > /
> > > > unlock. Otherwise, lockup happens because jiffies won't be
> > > > updated
> > > > dur to irq is disabled.
> > > > 
> > > > Issue is found in igt/gem_close_race.
> > > > 
> > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > > > ---
> > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> > > > +++++++++++++++++-
> > > > ---------
> > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > index 0a6b007..1418397 100644
> > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > > > i915_guc_client *gc)
> > > >  	union guc_doorbell_qw *db;
> > > >  	void *base;
> > > >  	int attempt = 2, ret = -EAGAIN;
> > > > +	unsigned long flags;
> > > > 
> > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > client_obj, 0));
> > > 
> > > We don't need kmap_atomic anymore here now, since it's outside of
> > > the
> > > spinlock.
> > > 
> > > >  	desc = base + gc->proc_desc_offset;
> > > > 
> > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> > > 
> > > Please don't use the super-generic _irqsave. It's expensive and
> > > results in
> > > fragile code when someone accidentally reuses something in an
> > > interrupt
> > > handler that was never meant to run in that context.
> > > 
> > > Instead please use the most specific funtion:
> > > - spin_lock if you know you are in irq context.
> > > - sipn_lock_irq if you know you are not.
> > 
> > Right, and simply spin_lock() if the lock is not taken in IRQ
> > context
> > ever.
> 
> This is not in IRQ context. So I will use spin_lock_irq instead.

You can just use spin_lock(). spin_lock_irq() makes only sense if you
take the lock in IRQ context too, which is not the case.

> > > - spin_lock_irqsave should be a big warning sign that your code
> > > has
> > >   layering issues.
> > > 
> > > Please audit the entire guc code for the above two issues.
> > 
> > Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
> > debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither
> > of
> > them are called from IRQ context AFAICS, in which case a simple
> > spin_lock() would do.
> > 
> > --Imre
> > 
> > > > +
> > > >  	/* Update the tail so it is visible to GuC */
> > > >  	desc->tail = gc->wq_tail;
> > > > 
> > > > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > > > i915_guc_client *gc)
> > > >  			db_exc.cookie = 1;
> > > >  	}
> > > > 
> > > > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > +
> > > >  	kunmap_atomic(base);
> > > > +
> > > >  	return ret;
> > > >  }
> > > > 
> > > > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > > > i915_guc_client *gc, u32 *offset)
> > > >  	struct guc_process_desc *desc;
> > > >  	void *base;
> > > >  	u32 size = sizeof(struct guc_wq_item);
> > > > -	int ret = 0, timeout_counter = 200;
> > > > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > > > +	unsigned long flags;
> > > > 
> > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > client_obj, 0));
> > > >  	desc = base + gc->proc_desc_offset;
> > > > 
> > > >  	while (timeout_counter-- > 0) {
> > > > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > > > desc->head,
> > > > -				gc->wq_size) >= size, 1);
> > > > +		spin_lock_irqsave(&gc->wq_lock, flags);
> > > > 
> > > > -		if (!ret) {
> > > > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > > > > wq_size) >= size) {
> > > >  			*offset = gc->wq_tail;
> > > > 
> > > >  			/* advance the tail for next workqueue
> > > > item */
> > > > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > > > i915_guc_client *gc, u32 *offset)
> > > > 
> > > >  			/* this will break the loop */
> > > >  			timeout_counter = 0;
> > > > +			ret = 0;
> > > >  		}
> > > > +
> > > > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > +
> > > > +		if (timeout_counter)
> > > > +			usleep_range(1000, 2000);
> > > 
> > > Do we really not have a interrupt/signal from the guc when it has
> > > cleared
> > > up some space?
> > > 
> 
> This is not implemented in fw although I think it could be done
> through 
> the guc to host interrupt. I am worry about that if we implement
> this, 
> it will end up with driver handles too many interrupts (maybe same 
> amount of context switch). However, ideally we don't want to handle 
> interrupts at all.
> > > >  	};
> > > > 
> > > >  	kunmap_atomic(base);
> > > > @@ -597,19 +609,17 @@ int i915_guc_submit(struct
> > > > i915_guc_client
> > > > *client,
> > > >  {
> > > >  	struct intel_guc *guc = client->guc;
> > > >  	enum intel_ring_id ring_id = rq->ring->id;
> > > > -	unsigned long flags;
> > > >  	int q_ret, b_ret;
> > > > 
> > > >  	/* Need this because of the deferred pin ctx and ring
> > > > */
> > > >  	/* Shall we move this right after ring is pinned? */
> > > >  	lr_context_update(rq);
> > > > 
> > > > -	spin_lock_irqsave(&client->wq_lock, flags);
> > > > -
> > > >  	q_ret = guc_add_workqueue_item(client, rq);
> > > >  	if (q_ret == 0)
> > > >  		b_ret = guc_ring_doorbell(client);
> > > > 
> > > > +	spin_lock(&guc->host2guc_lock);
> > > 
> > > So at first I thought there's a race now, but then I looked at
> > > what
> > > host2guc and wq_lock protect. It seems like the only thing they
> > > do is
> > > protect against debugfs, all the real protection against
> > > inconsistent
> > > state is done through dev->struct_mutex.
> > > 
> > > Can't we just rip out all this spinlock business from the guc
> > > code?
> > > It would be easier than fixing up the races in here.
> > 
> > 
> 
> Yes, host2guc lock can be done through dev->struct_mutex. But
> definitely 
> we don't want to interrupt the process when driver program guc work 
> queue and ring the door bell.
> > > -Daniel
> > > 
> > > >  	client->submissions[ring_id] += 1;
> > > >  	if (q_ret) {
> > > >  		client->q_fail += 1;
> > > > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > > > *client,
> > > >  	} else {
> > > >  		client->retcode = 0;
> > > >  	}
> > > > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > > > -
> > > > -	spin_lock(&guc->host2guc_lock);
> > > >  	guc->submissions[ring_id] += 1;
> > > >  	guc->last_seqno[ring_id] = rq->seqno;
> > > >  	spin_unlock(&guc->host2guc_lock);
> > > > --
> > > > 2.5.0
> > > > 
> > > 
>
Daniel Vetter Nov. 24, 2015, 6:08 p.m. UTC | #5
On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> > 
> > On 11/24/2015 05:26 AM, Imre Deak wrote:
> > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > > > > From: Alex Dai <yu.dai@intel.com>
> > > > > 
> > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> > > > > /
> > > > > unlock. Otherwise, lockup happens because jiffies won't be
> > > > > updated
> > > > > dur to irq is disabled.
> > > > > 
> > > > > Issue is found in igt/gem_close_race.
> > > > > 
> > > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > > > > ---
> > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> > > > > +++++++++++++++++-
> > > > > ---------
> > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > index 0a6b007..1418397 100644
> > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > > > > i915_guc_client *gc)
> > > > >  	union guc_doorbell_qw *db;
> > > > >  	void *base;
> > > > >  	int attempt = 2, ret = -EAGAIN;
> > > > > +	unsigned long flags;
> > > > > 
> > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > client_obj, 0));
> > > > 
> > > > We don't need kmap_atomic anymore here now, since it's outside of
> > > > the
> > > > spinlock.
> > > > 
> > > > >  	desc = base + gc->proc_desc_offset;
> > > > > 
> > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> > > > 
> > > > Please don't use the super-generic _irqsave. It's expensive and
> > > > results in
> > > > fragile code when someone accidentally reuses something in an
> > > > interrupt
> > > > handler that was never meant to run in that context.
> > > > 
> > > > Instead please use the most specific funtion:
> > > > - spin_lock if you know you are in irq context.
> > > > - sipn_lock_irq if you know you are not.
> > > 
> > > Right, and simply spin_lock() if the lock is not taken in IRQ
> > > context
> > > ever.
> > 
> > This is not in IRQ context. So I will use spin_lock_irq instead.
> 
> You can just use spin_lock(). spin_lock_irq() makes only sense if you
> take the lock in IRQ context too, which is not the case.

Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
-Daniel

> 
> > > > - spin_lock_irqsave should be a big warning sign that your code
> > > > has
> > > >   layering issues.
> > > > 
> > > > Please audit the entire guc code for the above two issues.
> > > 
> > > Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
> > > debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither
> > > of
> > > them are called from IRQ context AFAICS, in which case a simple
> > > spin_lock() would do.
> > > 
> > > --Imre
> > > 
> > > > > +
> > > > >  	/* Update the tail so it is visible to GuC */
> > > > >  	desc->tail = gc->wq_tail;
> > > > > 
> > > > > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > > > > i915_guc_client *gc)
> > > > >  			db_exc.cookie = 1;
> > > > >  	}
> > > > > 
> > > > > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > +
> > > > >  	kunmap_atomic(base);
> > > > > +
> > > > >  	return ret;
> > > > >  }
> > > > > 
> > > > > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > > > > i915_guc_client *gc, u32 *offset)
> > > > >  	struct guc_process_desc *desc;
> > > > >  	void *base;
> > > > >  	u32 size = sizeof(struct guc_wq_item);
> > > > > -	int ret = 0, timeout_counter = 200;
> > > > > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > > > > +	unsigned long flags;
> > > > > 
> > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > client_obj, 0));
> > > > >  	desc = base + gc->proc_desc_offset;
> > > > > 
> > > > >  	while (timeout_counter-- > 0) {
> > > > > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > > > > desc->head,
> > > > > -				gc->wq_size) >= size, 1);
> > > > > +		spin_lock_irqsave(&gc->wq_lock, flags);
> > > > > 
> > > > > -		if (!ret) {
> > > > > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > > > > > wq_size) >= size) {
> > > > >  			*offset = gc->wq_tail;
> > > > > 
> > > > >  			/* advance the tail for next workqueue
> > > > > item */
> > > > > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > > > > i915_guc_client *gc, u32 *offset)
> > > > > 
> > > > >  			/* this will break the loop */
> > > > >  			timeout_counter = 0;
> > > > > +			ret = 0;
> > > > >  		}
> > > > > +
> > > > > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > +
> > > > > +		if (timeout_counter)
> > > > > +			usleep_range(1000, 2000);
> > > > 
> > > > Do we really not have a interrupt/signal from the guc when it has
> > > > cleared
> > > > up some space?
> > > > 
> > 
> > This is not implemented in fw although I think it could be done
> > through 
> > the guc to host interrupt. I am worry about that if we implement
> > this, 
> > it will end up with driver handles too many interrupts (maybe same 
> > amount of context switch). However, ideally we don't want to handle 
> > interrupts at all.
> > > > >  	};
> > > > > 
> > > > >  	kunmap_atomic(base);
> > > > > @@ -597,19 +609,17 @@ int i915_guc_submit(struct
> > > > > i915_guc_client
> > > > > *client,
> > > > >  {
> > > > >  	struct intel_guc *guc = client->guc;
> > > > >  	enum intel_ring_id ring_id = rq->ring->id;
> > > > > -	unsigned long flags;
> > > > >  	int q_ret, b_ret;
> > > > > 
> > > > >  	/* Need this because of the deferred pin ctx and ring
> > > > > */
> > > > >  	/* Shall we move this right after ring is pinned? */
> > > > >  	lr_context_update(rq);
> > > > > 
> > > > > -	spin_lock_irqsave(&client->wq_lock, flags);
> > > > > -
> > > > >  	q_ret = guc_add_workqueue_item(client, rq);
> > > > >  	if (q_ret == 0)
> > > > >  		b_ret = guc_ring_doorbell(client);
> > > > > 
> > > > > +	spin_lock(&guc->host2guc_lock);
> > > > 
> > > > So at first I thought there's a race now, but then I looked at
> > > > what
> > > > host2guc and wq_lock protect. It seems like the only thing they
> > > > do is
> > > > protect against debugfs, all the real protection against
> > > > inconsistent
> > > > state is done through dev->struct_mutex.
> > > > 
> > > > Can't we just rip out all this spinlock business from the guc
> > > > code?
> > > > It would be easier than fixing up the races in here.
> > > 
> > > 
> > 
> > Yes, host2guc lock can be done through dev->struct_mutex. But
> > definitely 
> > we don't want to interrupt the process when driver program guc work 
> > queue and ring the door bell.
> > > > -Daniel
> > > > 
> > > > >  	client->submissions[ring_id] += 1;
> > > > >  	if (q_ret) {
> > > > >  		client->q_fail += 1;
> > > > > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > > > > *client,
> > > > >  	} else {
> > > > >  		client->retcode = 0;
> > > > >  	}
> > > > > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > > > > -
> > > > > -	spin_lock(&guc->host2guc_lock);
> > > > >  	guc->submissions[ring_id] += 1;
> > > > >  	guc->last_seqno[ring_id] = rq->seqno;
> > > > >  	spin_unlock(&guc->host2guc_lock);
> > > > > --
> > > > > 2.5.0
> > > > > 
> > > > 
> >
yu.dai@intel.com Nov. 24, 2015, 6:36 p.m. UTC | #6
On 11/24/2015 10:08 AM, Daniel Vetter wrote:
> On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> > On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> > >
> > > On 11/24/2015 05:26 AM, Imre Deak wrote:
> > > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > > > > > From: Alex Dai <yu.dai@intel.com>
> > > > > >
> > > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> > > > > > /
> > > > > > unlock. Otherwise, lockup happens because jiffies won't be
> > > > > > updated
> > > > > > dur to irq is disabled.
> > > > > >
> > > > > > Issue is found in igt/gem_close_race.
> > > > > >
> > > > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > > > > > ---
> > > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> > > > > > +++++++++++++++++-
> > > > > > ---------
> > > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > index 0a6b007..1418397 100644
> > > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > > > > > i915_guc_client *gc)
> > > > > >  	union guc_doorbell_qw *db;
> > > > > >  	void *base;
> > > > > >  	int attempt = 2, ret = -EAGAIN;
> > > > > > +	unsigned long flags;
> > > > > >
> > > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > > client_obj, 0));
> > > > >
> > > > > We don't need kmap_atomic anymore here now, since it's outside of
> > > > > the
> > > > > spinlock.
> > > > >
> > > > > >  	desc = base + gc->proc_desc_offset;
> > > > > >
> > > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> > > > >
> > > > > Please don't use the super-generic _irqsave. It's expensive and
> > > > > results in
> > > > > fragile code when someone accidentally reuses something in an
> > > > > interrupt
> > > > > handler that was never meant to run in that context.
> > > > >
> > > > > Instead please use the most specific funtion:
> > > > > - spin_lock if you know you are in irq context.
> > > > > - sipn_lock_irq if you know you are not.
> > > >
> > > > Right, and simply spin_lock() if the lock is not taken in IRQ
> > > > context
> > > > ever.
> > >
> > > This is not in IRQ context. So I will use spin_lock_irq instead.
> >
> > You can just use spin_lock(). spin_lock_irq() makes only sense if you
> > take the lock in IRQ context too, which is not the case.
>
> Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
>
How about using mutex_lock_interruptible(&dev->struct_mutex) instead in 
debugfs, which is to replace host2guc lock.

spinlock during ring the door bell is still needed.

Alex
> >
> > > > > - spin_lock_irqsave should be a big warning sign that your code
> > > > > has
> > > > >   layering issues.
> > > > >
> > > > > Please audit the entire guc code for the above two issues.
> > > >
> > > > Agreed, it looks inconsistent atm: we do spin_lock(wq_lock) from
> > > > debugfs and spin_lock_irq(wq_lock) from i915_guc_submit(). Neither
> > > > of
> > > > them are called from IRQ context AFAICS, in which case a simple
> > > > spin_lock() would do.
> > > >
> > > > --Imre
> > > >
> > > > > > +
> > > > > >  	/* Update the tail so it is visible to GuC */
> > > > > >  	desc->tail = gc->wq_tail;
> > > > > >
> > > > > > @@ -248,7 +251,10 @@ static int guc_ring_doorbell(struct
> > > > > > i915_guc_client *gc)
> > > > > >  			db_exc.cookie = 1;
> > > > > >  	}
> > > > > >
> > > > > > +	spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > > +
> > > > > >  	kunmap_atomic(base);
> > > > > > +
> > > > > >  	return ret;
> > > > > >  }
> > > > > >
> > > > > > @@ -487,16 +493,16 @@ static int guc_get_workqueue_space(struct
> > > > > > i915_guc_client *gc, u32 *offset)
> > > > > >  	struct guc_process_desc *desc;
> > > > > >  	void *base;
> > > > > >  	u32 size = sizeof(struct guc_wq_item);
> > > > > > -	int ret = 0, timeout_counter = 200;
> > > > > > +	int ret = -ETIMEDOUT, timeout_counter = 200;
> > > > > > +	unsigned long flags;
> > > > > >
> > > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > > > > > > client_obj, 0));
> > > > > >  	desc = base + gc->proc_desc_offset;
> > > > > >
> > > > > >  	while (timeout_counter-- > 0) {
> > > > > > -		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail,
> > > > > > desc->head,
> > > > > > -				gc->wq_size) >= size, 1);
> > > > > > +		spin_lock_irqsave(&gc->wq_lock, flags);
> > > > > >
> > > > > > -		if (!ret) {
> > > > > > +		if (CIRC_SPACE(gc->wq_tail, desc->head, gc-
> > > > > > > wq_size) >= size) {
> > > > > >  			*offset = gc->wq_tail;
> > > > > >
> > > > > >  			/* advance the tail for next workqueue
> > > > > > item */
> > > > > > @@ -505,7 +511,13 @@ static int guc_get_workqueue_space(struct
> > > > > > i915_guc_client *gc, u32 *offset)
> > > > > >
> > > > > >  			/* this will break the loop */
> > > > > >  			timeout_counter = 0;
> > > > > > +			ret = 0;
> > > > > >  		}
> > > > > > +
> > > > > > +		spin_unlock_irqrestore(&gc->wq_lock, flags);
> > > > > > +
> > > > > > +		if (timeout_counter)
> > > > > > +			usleep_range(1000, 2000);
> > > > >
> > > > > Do we really not have a interrupt/signal from the guc when it has
> > > > > cleared
> > > > > up some space?
> > > > >
> > >
> > > This is not implemented in fw although I think it could be done
> > > through
> > > the guc to host interrupt. I am worry about that if we implement
> > > this,
> > > it will end up with driver handles too many interrupts (maybe same
> > > amount of context switch). However, ideally we don't want to handle
> > > interrupts at all.
> > > > > >  	};
> > > > > >
> > > > > >  	kunmap_atomic(base);
> > > > > > @@ -597,19 +609,17 @@ int i915_guc_submit(struct
> > > > > > i915_guc_client
> > > > > > *client,
> > > > > >  {
> > > > > >  	struct intel_guc *guc = client->guc;
> > > > > >  	enum intel_ring_id ring_id = rq->ring->id;
> > > > > > -	unsigned long flags;
> > > > > >  	int q_ret, b_ret;
> > > > > >
> > > > > >  	/* Need this because of the deferred pin ctx and ring
> > > > > > */
> > > > > >  	/* Shall we move this right after ring is pinned? */
> > > > > >  	lr_context_update(rq);
> > > > > >
> > > > > > -	spin_lock_irqsave(&client->wq_lock, flags);
> > > > > > -
> > > > > >  	q_ret = guc_add_workqueue_item(client, rq);
> > > > > >  	if (q_ret == 0)
> > > > > >  		b_ret = guc_ring_doorbell(client);
> > > > > >
> > > > > > +	spin_lock(&guc->host2guc_lock);
> > > > >
> > > > > So at first I thought there's a race now, but then I looked at
> > > > > what
> > > > > host2guc and wq_lock protect. It seems like the only thing they
> > > > > do is
> > > > > protect against debugfs, all the real protection against
> > > > > inconsistent
> > > > > state is done through dev->struct_mutex.
> > > > >
> > > > > Can't we just rip out all this spinlock business from the guc
> > > > > code?
> > > > > It would be easier than fixing up the races in here.
> > > >
> > > >
> > >
> > > Yes, host2guc lock can be done through dev->struct_mutex. But
> > > definitely
> > > we don't want to interrupt the process when driver program guc work
> > > queue and ring the door bell.
> > > > > -Daniel
> > > > >
> > > > > >  	client->submissions[ring_id] += 1;
> > > > > >  	if (q_ret) {
> > > > > >  		client->q_fail += 1;
> > > > > > @@ -620,9 +630,6 @@ int i915_guc_submit(struct i915_guc_client
> > > > > > *client,
> > > > > >  	} else {
> > > > > >  		client->retcode = 0;
> > > > > >  	}
> > > > > > -	spin_unlock_irqrestore(&client->wq_lock, flags);
> > > > > > -
> > > > > > -	spin_lock(&guc->host2guc_lock);
> > > > > >  	guc->submissions[ring_id] += 1;
> > > > > >  	guc->last_seqno[ring_id] = rq->seqno;
> > > > > >  	spin_unlock(&guc->host2guc_lock);
> > > > > > --
> > > > > > 2.5.0
> > > > > >
> > > > >
> > >
>
Daniel Vetter Nov. 24, 2015, 7:13 p.m. UTC | #7
On Tue, Nov 24, 2015 at 10:36:54AM -0800, Yu Dai wrote:
> 
> 
> On 11/24/2015 10:08 AM, Daniel Vetter wrote:
> >On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> >> On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> >> >
> >> > On 11/24/2015 05:26 AM, Imre Deak wrote:
> >> > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> >> > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> >> > > > > From: Alex Dai <yu.dai@intel.com>
> >> > > > >
> >> > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> >> > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> >> > > > > /
> >> > > > > unlock. Otherwise, lockup happens because jiffies won't be
> >> > > > > updated
> >> > > > > dur to irq is disabled.
> >> > > > >
> >> > > > > Issue is found in igt/gem_close_race.
> >> > > > >
> >> > > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> >> > > > > ---
> >> > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> >> > > > > +++++++++++++++++-
> >> > > > > ---------
> >> > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> >> > > > >
> >> > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> >> > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> >> > > > > index 0a6b007..1418397 100644
> >> > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >> > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >> > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> >> > > > > i915_guc_client *gc)
> >> > > > >  	union guc_doorbell_qw *db;
> >> > > > >  	void *base;
> >> > > > >  	int attempt = 2, ret = -EAGAIN;
> >> > > > > +	unsigned long flags;
> >> > > > >
> >> > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> >> > > > > > client_obj, 0));
> >> > > >
> >> > > > We don't need kmap_atomic anymore here now, since it's outside of
> >> > > > the
> >> > > > spinlock.
> >> > > >
> >> > > > >  	desc = base + gc->proc_desc_offset;
> >> > > > >
> >> > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> >> > > >
> >> > > > Please don't use the super-generic _irqsave. It's expensive and
> >> > > > results in
> >> > > > fragile code when someone accidentally reuses something in an
> >> > > > interrupt
> >> > > > handler that was never meant to run in that context.
> >> > > >
> >> > > > Instead please use the most specific funtion:
> >> > > > - spin_lock if you know you are in irq context.
> >> > > > - sipn_lock_irq if you know you are not.
> >> > >
> >> > > Right, and simply spin_lock() if the lock is not taken in IRQ
> >> > > context
> >> > > ever.
> >> >
> >> > This is not in IRQ context. So I will use spin_lock_irq instead.
> >>
> >> You can just use spin_lock(). spin_lock_irq() makes only sense if you
> >> take the lock in IRQ context too, which is not the case.
> >
> >Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
> >
> How about using mutex_lock_interruptible(&dev->struct_mutex) instead in
> debugfs, which is to replace host2guc lock.

Yes.

> spinlock during ring the door bell is still needed.

Where/why is that needed? At least on a quick look I didn't notice
anything.
-Daniel
yu.dai@intel.com Nov. 24, 2015, 10:34 p.m. UTC | #8
On 11/24/2015 11:13 AM, Daniel Vetter wrote:
> On Tue, Nov 24, 2015 at 10:36:54AM -0800, Yu Dai wrote:
> >
> >
> > On 11/24/2015 10:08 AM, Daniel Vetter wrote:
> > >On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> > >> On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> > >> >
> > >> > On 11/24/2015 05:26 AM, Imre Deak wrote:
> > >> > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> > >> > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> > >> > > > > From: Alex Dai <yu.dai@intel.com>
> > >> > > > >
> > >> > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> > >> > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> > >> > > > > /
> > >> > > > > unlock. Otherwise, lockup happens because jiffies won't be
> > >> > > > > updated
> > >> > > > > dur to irq is disabled.
> > >> > > > >
> > >> > > > > Issue is found in igt/gem_close_race.
> > >> > > > >
> > >> > > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> > >> > > > > ---
> > >> > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> > >> > > > > +++++++++++++++++-
> > >> > > > > ---------
> > >> > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> > >> > > > >
> > >> > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> > >> > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> > >> > > > > index 0a6b007..1418397 100644
> > >> > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> > >> > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> > >> > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> > >> > > > > i915_guc_client *gc)
> > >> > > > >  	union guc_doorbell_qw *db;
> > >> > > > >  	void *base;
> > >> > > > >  	int attempt = 2, ret = -EAGAIN;
> > >> > > > > +	unsigned long flags;
> > >> > > > >
> > >> > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> > >> > > > > > client_obj, 0));
> > >> > > >
> > >> > > > We don't need kmap_atomic anymore here now, since it's outside of
> > >> > > > the
> > >> > > > spinlock.
> > >> > > >
> > >> > > > >  	desc = base + gc->proc_desc_offset;
> > >> > > > >
> > >> > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> > >> > > >
> > >> > > > Please don't use the super-generic _irqsave. It's expensive and
> > >> > > > results in
> > >> > > > fragile code when someone accidentally reuses something in an
> > >> > > > interrupt
> > >> > > > handler that was never meant to run in that context.
> > >> > > >
> > >> > > > Instead please use the most specific funtion:
> > >> > > > - spin_lock if you know you are in irq context.
> > >> > > > - sipn_lock_irq if you know you are not.
> > >> > >
> > >> > > Right, and simply spin_lock() if the lock is not taken in IRQ
> > >> > > context
> > >> > > ever.
> > >> >
> > >> > This is not in IRQ context. So I will use spin_lock_irq instead.
> > >>
> > >> You can just use spin_lock(). spin_lock_irq() makes only sense if you
> > >> take the lock in IRQ context too, which is not the case.
> > >
> > >Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
> > >
> > How about using mutex_lock_interruptible(&dev->struct_mutex) instead in
> > debugfs, which is to replace host2guc lock.
>
> Yes.
>
> > spinlock during ring the door bell is still needed.
>
> Where/why is that needed? At least on a quick look I didn't notice
> anything.
>

Currently there is only one guc client to do the commands submission. It 
appears we don't need the lock. When there are more clients and all 
write to the scratch registers or ring the door bell, we don't want them 
interact with each other. Also, if we implement guc to host interrupt 
(says to handle the log buffer full event), we do need to protect the 
guc client content. Well, none presents today. I can clean up these and 
test out.

Alex
Daniel Vetter Nov. 25, 2015, 8:45 a.m. UTC | #9
On Tue, Nov 24, 2015 at 02:34:46PM -0800, Yu Dai wrote:
> 
> 
> On 11/24/2015 11:13 AM, Daniel Vetter wrote:
> >On Tue, Nov 24, 2015 at 10:36:54AM -0800, Yu Dai wrote:
> >>
> >>
> >> On 11/24/2015 10:08 AM, Daniel Vetter wrote:
> >> >On Tue, Nov 24, 2015 at 07:05:47PM +0200, Imre Deak wrote:
> >> >> On ti, 2015-11-24 at 09:00 -0800, Yu Dai wrote:
> >> >> >
> >> >> > On 11/24/2015 05:26 AM, Imre Deak wrote:
> >> >> > > On ti, 2015-11-24 at 14:04 +0100, Daniel Vetter wrote:
> >> >> > > > On Mon, Nov 23, 2015 at 03:02:58PM -0800, yu.dai@intel.com wrote:
> >> >> > > > > From: Alex Dai <yu.dai@intel.com>
> >> >> > > > >
> >> >> > > > > When GuC Work Queue is full, driver will wait GuC for avaliable
> >> >> > > > > space by delaying 1ms. The wait needs to be out of spinlockirq
> >> >> > > > > /
> >> >> > > > > unlock. Otherwise, lockup happens because jiffies won't be
> >> >> > > > > updated
> >> >> > > > > dur to irq is disabled.
> >> >> > > > >
> >> >> > > > > Issue is found in igt/gem_close_race.
> >> >> > > > >
> >> >> > > > > Signed-off-by: Alex Dai <yu.dai@intel.com>
> >> >> > > > > ---
> >> >> > > > >  drivers/gpu/drm/i915/i915_guc_submission.c | 27
> >> >> > > > > +++++++++++++++++-
> >> >> > > > > ---------
> >> >> > > > >  1 file changed, 17 insertions(+), 10 deletions(-)
> >> >> > > > >
> >> >> > > > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
> >> >> > > > > b/drivers/gpu/drm/i915/i915_guc_submission.c
> >> >> > > > > index 0a6b007..1418397 100644
> >> >> > > > > --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> >> >> > > > > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> >> >> > > > > @@ -201,10 +201,13 @@ static int guc_ring_doorbell(struct
> >> >> > > > > i915_guc_client *gc)
> >> >> > > > >  	union guc_doorbell_qw *db;
> >> >> > > > >  	void *base;
> >> >> > > > >  	int attempt = 2, ret = -EAGAIN;
> >> >> > > > > +	unsigned long flags;
> >> >> > > > >
> >> >> > > > >  	base = kmap_atomic(i915_gem_object_get_page(gc-
> >> >> > > > > > client_obj, 0));
> >> >> > > >
> >> >> > > > We don't need kmap_atomic anymore here now, since it's outside of
> >> >> > > > the
> >> >> > > > spinlock.
> >> >> > > >
> >> >> > > > >  	desc = base + gc->proc_desc_offset;
> >> >> > > > >
> >> >> > > > > +	spin_lock_irqsave(&gc->wq_lock, flags);
> >> >> > > >
> >> >> > > > Please don't use the super-generic _irqsave. It's expensive and
> >> >> > > > results in
> >> >> > > > fragile code when someone accidentally reuses something in an
> >> >> > > > interrupt
> >> >> > > > handler that was never meant to run in that context.
> >> >> > > >
> >> >> > > > Instead please use the most specific funtion:
> >> >> > > > - spin_lock if you know you are in irq context.
> >> >> > > > - sipn_lock_irq if you know you are not.
> >> >> > >
> >> >> > > Right, and simply spin_lock() if the lock is not taken in IRQ
> >> >> > > context
> >> >> > > ever.
> >> >> >
> >> >> > This is not in IRQ context. So I will use spin_lock_irq instead.
> >> >>
> >> >> You can just use spin_lock(). spin_lock_irq() makes only sense if you
> >> >> take the lock in IRQ context too, which is not the case.
> >> >
> >> >Imo just drop both spinlocks, adding locks for debugfs is overkill imo.
> >> >
> >> How about using mutex_lock_interruptible(&dev->struct_mutex) instead in
> >> debugfs, which is to replace host2guc lock.
> >
> >Yes.
> >
> >> spinlock during ring the door bell is still needed.
> >
> >Where/why is that needed? At least on a quick look I didn't notice
> >anything.
> >
> 
> Currently there is only one guc client to do the commands submission. It
> appears we don't need the lock. When there are more clients and all write to
> the scratch registers or ring the door bell, we don't want them interact
> with each other. Also, if we implement guc to host interrupt (says to handle
> the log buffer full event), we do need to protect the guc client content.
> Well, none presents today. I can clean up these and test out.

Yeah I think it's better to add locking when we need it, since very likely
something will have changed in the codebase by then. Otherwise there's
only complication and confusion. So please remove the spinlocks and use
the usual interruptible mutex locking for debugfs for now.

Thanks, Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 0a6b007..1418397 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -201,10 +201,13 @@  static int guc_ring_doorbell(struct i915_guc_client *gc)
 	union guc_doorbell_qw *db;
 	void *base;
 	int attempt = 2, ret = -EAGAIN;
+	unsigned long flags;
 
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
+	spin_lock_irqsave(&gc->wq_lock, flags);
+
 	/* Update the tail so it is visible to GuC */
 	desc->tail = gc->wq_tail;
 
@@ -248,7 +251,10 @@  static int guc_ring_doorbell(struct i915_guc_client *gc)
 			db_exc.cookie = 1;
 	}
 
+	spin_unlock_irqrestore(&gc->wq_lock, flags);
+
 	kunmap_atomic(base);
+
 	return ret;
 }
 
@@ -487,16 +493,16 @@  static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 	struct guc_process_desc *desc;
 	void *base;
 	u32 size = sizeof(struct guc_wq_item);
-	int ret = 0, timeout_counter = 200;
+	int ret = -ETIMEDOUT, timeout_counter = 200;
+	unsigned long flags;
 
 	base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, 0));
 	desc = base + gc->proc_desc_offset;
 
 	while (timeout_counter-- > 0) {
-		ret = wait_for_atomic(CIRC_SPACE(gc->wq_tail, desc->head,
-				gc->wq_size) >= size, 1);
+		spin_lock_irqsave(&gc->wq_lock, flags);
 
-		if (!ret) {
+		if (CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size) >= size) {
 			*offset = gc->wq_tail;
 
 			/* advance the tail for next workqueue item */
@@ -505,7 +511,13 @@  static int guc_get_workqueue_space(struct i915_guc_client *gc, u32 *offset)
 
 			/* this will break the loop */
 			timeout_counter = 0;
+			ret = 0;
 		}
+
+		spin_unlock_irqrestore(&gc->wq_lock, flags);
+
+		if (timeout_counter)
+			usleep_range(1000, 2000);
 	};
 
 	kunmap_atomic(base);
@@ -597,19 +609,17 @@  int i915_guc_submit(struct i915_guc_client *client,
 {
 	struct intel_guc *guc = client->guc;
 	enum intel_ring_id ring_id = rq->ring->id;
-	unsigned long flags;
 	int q_ret, b_ret;
 
 	/* Need this because of the deferred pin ctx and ring */
 	/* Shall we move this right after ring is pinned? */
 	lr_context_update(rq);
 
-	spin_lock_irqsave(&client->wq_lock, flags);
-
 	q_ret = guc_add_workqueue_item(client, rq);
 	if (q_ret == 0)
 		b_ret = guc_ring_doorbell(client);
 
+	spin_lock(&guc->host2guc_lock);
 	client->submissions[ring_id] += 1;
 	if (q_ret) {
 		client->q_fail += 1;
@@ -620,9 +630,6 @@  int i915_guc_submit(struct i915_guc_client *client,
 	} else {
 		client->retcode = 0;
 	}
-	spin_unlock_irqrestore(&client->wq_lock, flags);
-
-	spin_lock(&guc->host2guc_lock);
 	guc->submissions[ring_id] += 1;
 	guc->last_seqno[ring_id] = rq->seqno;
 	spin_unlock(&guc->host2guc_lock);