diff mbox

[2/2] drm/i915: Avoid unguarded reads from the request pointer

Message ID 20170206132559.6137-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Feb. 6, 2017, 1:25 p.m. UTC
In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch
completion matches our context") I added a read to the irq tasklet
handler that compared the on-chip status with that of our sw tracking,
using an unguarded read of the request pointer to get the context and
beyond. Whilst we hold a reference to the request, we do not hold
anything on the context and if we are unlucky it may be reaped from a
second thread retiring the request (since it may retire the request as
soon as the breadcrumb is complete, even before we finish processing the
context switch) as we try to read from the context pointer.

Avoid the racy read from underneath the request by storing the expected
result in the execlist_port[].

Fixes: 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context")
Reported-by: Mika Kuoppala <mika.kuoppala@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c        | 7 ++++---
 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

Comments

Mika Kuoppala Feb. 6, 2017, 1:57 p.m. UTC | #1
Chris Wilson <chris@chris-wilson.co.uk> writes:

> In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch
> completion matches our context") I added a read to the irq tasklet
> handler that compared the on-chip status with that of our sw tracking,
> using an unguarded read of the request pointer to get the context and
> beyond. Whilst we hold a reference to the request, we do not hold
> anything on the context and if we are unlucky it may be reaped from a
> second thread retiring the request (since it may retire the request as
> soon as the breadcrumb is complete, even before we finish processing the
> context switch) as we try to read from the context pointer.
>

Please add warning of the possibility of context vanishing beneath
our feet. Perhaps a good spot is when we store a bug on variable
context_id.

Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>

> Avoid the racy read from underneath the request by storing the expected
> result in the execlist_port[].
>
> Fixes: 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context")
> Reported-by: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        | 7 ++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 754f77c394fb..ba39c2952438 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -350,6 +350,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  		execlists_context_status_change(port[0].request,
>  						INTEL_CONTEXT_SCHEDULE_IN);
>  	desc[0] = execlists_update_context(port[0].request);
> +	GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0]));
>  	port[0].count++;
>  
>  	if (port[1].request) {
> @@ -357,6 +358,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  		execlists_context_status_change(port[1].request,
>  						INTEL_CONTEXT_SCHEDULE_IN);
>  		desc[1] = execlists_update_context(port[1].request);
> +		GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1]));
>  		port[1].count = 1;
>  	} else {
>  		desc[1] = 0;
> @@ -563,9 +565,8 @@ static void intel_lrc_irq_handler(unsigned long data)
>  				continue;
>  
>  			/* Check the context/desc id for this event matches */
> -			GEM_BUG_ON(readl(buf + 2 * idx + 1) !=
> -				   upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx,
> -									     engine)));
> +			GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) !=
> +					port[0].context_id);
>  
>  			GEM_BUG_ON(port[0].count == 0);
>  			if (--port[0].count == 0) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2c6d3655985e..896838ca502c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -381,6 +381,7 @@ struct intel_engine_cs {
>  	struct execlist_port {
>  		struct drm_i915_gem_request *request;
>  		unsigned int count;
> +		GEM_BUG_ONLY_DECLARE(u32 context_id);
>  	} execlist_port[2];
>  	struct rb_root execlist_queue;
>  	struct rb_node *execlist_first;
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Mika Kuoppala Feb. 6, 2017, 2:01 p.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch
> completion matches our context") I added a read to the irq tasklet
> handler that compared the on-chip status with that of our sw tracking,
> using an unguarded read of the request pointer to get the context and
> beyond. Whilst we hold a reference to the request, we do not hold
> anything on the context and if we are unlucky it may be reaped from a
> second thread retiring the request (since it may retire the request as
> soon as the breadcrumb is complete, even before we finish processing the
> context switch) as we try to read from the context pointer.
>
> Avoid the racy read from underneath the request by storing the expected
> result in the execlist_port[].
>
> Fixes: 86aa7e760a67 ("drm/i915: Assert that the context-switch
> completion matches our context")

+ Testcase: igt/gem_ctx_create

> Reported-by: Mika Kuoppala <mika.kuoppala@intel.com>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_lrc.c        | 7 ++++---
>  drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 754f77c394fb..ba39c2952438 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -350,6 +350,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  		execlists_context_status_change(port[0].request,
>  						INTEL_CONTEXT_SCHEDULE_IN);
>  	desc[0] = execlists_update_context(port[0].request);
> +	GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0]));
>  	port[0].count++;
>  
>  	if (port[1].request) {
> @@ -357,6 +358,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
>  		execlists_context_status_change(port[1].request,
>  						INTEL_CONTEXT_SCHEDULE_IN);
>  		desc[1] = execlists_update_context(port[1].request);
> +		GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1]));
>  		port[1].count = 1;
>  	} else {
>  		desc[1] = 0;
> @@ -563,9 +565,8 @@ static void intel_lrc_irq_handler(unsigned long data)
>  				continue;
>  
>  			/* Check the context/desc id for this event matches */
> -			GEM_BUG_ON(readl(buf + 2 * idx + 1) !=
> -				   upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx,
> -									     engine)));
> +			GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) !=
> +					port[0].context_id);
>  
>  			GEM_BUG_ON(port[0].count == 0);
>  			if (--port[0].count == 0) {
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 2c6d3655985e..896838ca502c 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -381,6 +381,7 @@ struct intel_engine_cs {
>  	struct execlist_port {
>  		struct drm_i915_gem_request *request;
>  		unsigned int count;
> +		GEM_BUG_ONLY_DECLARE(u32 context_id);
>  	} execlist_port[2];
>  	struct rb_root execlist_queue;
>  	struct rb_node *execlist_first;
> -- 
> 2.11.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Feb. 6, 2017, 2:06 p.m. UTC | #3
On Mon, Feb 06, 2017 at 03:57:47PM +0200, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch
> > completion matches our context") I added a read to the irq tasklet
> > handler that compared the on-chip status with that of our sw tracking,
> > using an unguarded read of the request pointer to get the context and
> > beyond. Whilst we hold a reference to the request, we do not hold
> > anything on the context and if we are unlucky it may be reaped from a
> > second thread retiring the request (since it may retire the request as
> > soon as the breadcrumb is complete, even before we finish processing the
> > context switch) as we try to read from the context pointer.
> >
> 
> Please add warning of the possibility of context vanishing beneath
> our feet. Perhaps a good spot is when we store a bug on variable
> context_id.

I prefer it in ctx-switch, since that is where we are in a softirq
section trying to operate under the minimal of locking.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 754f77c394fb..ba39c2952438 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -350,6 +350,7 @@  static void execlists_submit_ports(struct intel_engine_cs *engine)
 		execlists_context_status_change(port[0].request,
 						INTEL_CONTEXT_SCHEDULE_IN);
 	desc[0] = execlists_update_context(port[0].request);
+	GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0]));
 	port[0].count++;
 
 	if (port[1].request) {
@@ -357,6 +358,7 @@  static void execlists_submit_ports(struct intel_engine_cs *engine)
 		execlists_context_status_change(port[1].request,
 						INTEL_CONTEXT_SCHEDULE_IN);
 		desc[1] = execlists_update_context(port[1].request);
+		GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1]));
 		port[1].count = 1;
 	} else {
 		desc[1] = 0;
@@ -563,9 +565,8 @@  static void intel_lrc_irq_handler(unsigned long data)
 				continue;
 
 			/* Check the context/desc id for this event matches */
-			GEM_BUG_ON(readl(buf + 2 * idx + 1) !=
-				   upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx,
-									     engine)));
+			GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) !=
+					port[0].context_id);
 
 			GEM_BUG_ON(port[0].count == 0);
 			if (--port[0].count == 0) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2c6d3655985e..896838ca502c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -381,6 +381,7 @@  struct intel_engine_cs {
 	struct execlist_port {
 		struct drm_i915_gem_request *request;
 		unsigned int count;
+		GEM_BUG_ONLY_DECLARE(u32 context_id);
 	} execlist_port[2];
 	struct rb_root execlist_queue;
 	struct rb_node *execlist_first;