diff mbox

drm/i915: Detect invalid pages for SandyBridge

Message ID 1348734477-23369-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Sept. 27, 2012, 8:27 a.m. UTC
As SandyBridge returns garbage when decoding certain addresses through
the GTT (all memory below 1MiB and a very small number of individual
pages) we need to prevent the GPU from utilizing those pages. The
ultimate goal would be to prevent our allocator from handing us those
pages, but that is a longer term project. In the short term, we can
detect when we attempt to bind those pages to the GPU and return an
error to the application rather than hang the GPU and potentially the
system.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |   40 ++++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

Comments

Daniel Vetter Sept. 27, 2012, 12:14 p.m. UTC | #1
On Thu, Sep 27, 2012 at 09:27:57AM +0100, Chris Wilson wrote:
> As SandyBridge returns garbage when decoding certain addresses through
> the GTT (all memory below 1MiB and a very small number of individual
> pages) we need to prevent the GPU from utilizing those pages. The
> ultimate goal would be to prevent our allocator from handing us those
> pages, but that is a longer term project. In the short term, we can
> detect when we attempt to bind those pages to the GPU and return an
> error to the application rather than hang the GPU and potentially the
> system.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c |   40 ++++++++++++++++++++++++++++++++---
>  1 file changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index d9d3fc7..5c7ccfd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -306,17 +306,51 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>  	intel_gtt_chipset_flush();
>  }
>  
> +static bool
> +gen6_valid_addresses(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		dma_addr_t addr = sg_dma_address(sg);
> +		if (WARN(addr < 0x100000 ||
> +			 addr == 0x20050000 ||
> +			 addr == 0x20110000 ||
> +			 addr == 0x20130000 ||
> +			 addr == 0x20138000 ||
> +			 addr == 0x40004000,
> +			 "object references unaddressable physical pages: addr=%x",
> +			 (u32)addr))
> +			return false;

Iirc the bug is about the physical address, no the remapped one after
dmar. We'd need to check the windows code for that though ...
-Daniel

> +	}
> +
> +	return true;
> +}
> +
>  int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
>  {
> -	if (obj->has_dma_mapping)
> -		return 0;
> +	int ret;
>  
> -	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> +	if (!obj->has_dma_mapping &&
> +	    !dma_map_sg(&obj->base.dev->pdev->dev,
>  			obj->pages->sgl, obj->pages->nents,
>  			PCI_DMA_BIDIRECTIONAL))
>  		return -ENOSPC;
>  
> +	if (IS_GEN6(obj->base.dev) && !gen6_valid_addresses(obj)) {
> +		ret = -EFAULT;
> +		goto unmap;
> +	}
> +
>  	return 0;
> +
> +unmap:
> +	if (!obj->has_dma_mapping)
> +		dma_unmap_sg(&obj->base.dev->pdev->dev,
> +			     obj->pages->sgl, obj->pages->nents,
> +			     PCI_DMA_BIDIRECTIONAL);
> +	return ret;
>  }
>  
>  void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter Sept. 27, 2012, 12:16 p.m. UTC | #2
On Thu, Sep 27, 2012 at 02:14:22PM +0200, Daniel Vetter wrote:
> On Thu, Sep 27, 2012 at 09:27:57AM +0100, Chris Wilson wrote:
> > As SandyBridge returns garbage when decoding certain addresses through
> > the GTT (all memory below 1MiB and a very small number of individual
> > pages) we need to prevent the GPU from utilizing those pages. The
> > ultimate goal would be to prevent our allocator from handing us those
> > pages, but that is a longer term project. In the short term, we can
> > detect when we attempt to bind those pages to the GPU and return an
> > error to the application rather than hang the GPU and potentially the
> > system.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_gem_gtt.c |   40 ++++++++++++++++++++++++++++++++---
> >  1 file changed, 37 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > index d9d3fc7..5c7ccfd 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > @@ -306,17 +306,51 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> >  	intel_gtt_chipset_flush();
> >  }
> >  
> > +static bool
> > +gen6_valid_addresses(struct drm_i915_gem_object *obj)
> > +{
> > +	struct scatterlist *sg;
> > +	int i;
> > +
> > +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> > +		dma_addr_t addr = sg_dma_address(sg);
> > +		if (WARN(addr < 0x100000 ||
> > +			 addr == 0x20050000 ||
> > +			 addr == 0x20110000 ||
> > +			 addr == 0x20130000 ||
> > +			 addr == 0x20138000 ||
> > +			 addr == 0x40004000,
> > +			 "object references unaddressable physical pages: addr=%x",
> > +			 (u32)addr))
> > +			return false;
> 
> Iirc the bug is about the physical address, no the remapped one after
> dmar. We'd need to check the windows code for that though ...

Also maybe don't fail, but only smash some dirt into dmesg - otherwise
we'll score a neat regression report and probably angry users.
-Daniel

> 
> > +	}
> > +
> > +	return true;
> > +}
> > +
> >  int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> >  {
> > -	if (obj->has_dma_mapping)
> > -		return 0;
> > +	int ret;
> >  
> > -	if (!dma_map_sg(&obj->base.dev->pdev->dev,
> > +	if (!obj->has_dma_mapping &&
> > +	    !dma_map_sg(&obj->base.dev->pdev->dev,
> >  			obj->pages->sgl, obj->pages->nents,
> >  			PCI_DMA_BIDIRECTIONAL))
> >  		return -ENOSPC;
> >  
> > +	if (IS_GEN6(obj->base.dev) && !gen6_valid_addresses(obj)) {
> > +		ret = -EFAULT;
> > +		goto unmap;
> > +	}
> > +
> >  	return 0;
> > +
> > +unmap:
> > +	if (!obj->has_dma_mapping)
> > +		dma_unmap_sg(&obj->base.dev->pdev->dev,
> > +			     obj->pages->sgl, obj->pages->nents,
> > +			     PCI_DMA_BIDIRECTIONAL);
> > +	return ret;
> >  }
> >  
> >  void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> > -- 
> > 1.7.10.4
> > 
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
Chris Wilson Sept. 27, 2012, 12:22 p.m. UTC | #3
On Thu, 27 Sep 2012 14:16:11 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Thu, Sep 27, 2012 at 02:14:22PM +0200, Daniel Vetter wrote:
> > On Thu, Sep 27, 2012 at 09:27:57AM +0100, Chris Wilson wrote:
> > > As SandyBridge returns garbage when decoding certain addresses through
> > > the GTT (all memory below 1MiB and a very small number of individual
> > > pages) we need to prevent the GPU from utilizing those pages. The
> > > ultimate goal would be to prevent our allocator from handing us those
> > > pages, but that is a longer term project. In the short term, we can
> > > detect when we attempt to bind those pages to the GPU and return an
> > > error to the application rather than hang the GPU and potentially the
> > > system.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > ---
> > >  drivers/gpu/drm/i915/i915_gem_gtt.c |   40 ++++++++++++++++++++++++++++++++---
> > >  1 file changed, 37 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > index d9d3fc7..5c7ccfd 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> > > @@ -306,17 +306,51 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
> > >  	intel_gtt_chipset_flush();
> > >  }
> > >  
> > > +static bool
> > > +gen6_valid_addresses(struct drm_i915_gem_object *obj)
> > > +{
> > > +	struct scatterlist *sg;
> > > +	int i;
> > > +
> > > +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> > > +		dma_addr_t addr = sg_dma_address(sg);
> > > +		if (WARN(addr < 0x100000 ||
> > > +			 addr == 0x20050000 ||
> > > +			 addr == 0x20110000 ||
> > > +			 addr == 0x20130000 ||
> > > +			 addr == 0x20138000 ||
> > > +			 addr == 0x40004000,
> > > +			 "object references unaddressable physical pages: addr=%x",
> > > +			 (u32)addr))
> > > +			return false;
> > 
> > Iirc the bug is about the physical address, no the remapped one after
> > dmar. We'd need to check the windows code for that though ...

Hmm, we pass the remapped addresses to the GTT so I presumed that would
be what the decoder threw up over.

> Also maybe don't fail, but only smash some dirt into dmesg - otherwise
> we'll score a neat regression report and probably angry users.

I wouldn't consider that it was a regression, we go from either render
corruption, GPU hang or full system hang to absent rendering and lots of
noise.

I'd much rather take the latter. All in all though, I really want to see
if this is ever an issue and when.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index d9d3fc7..5c7ccfd 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -306,17 +306,51 @@  void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	intel_gtt_chipset_flush();
 }
 
+static bool
+gen6_valid_addresses(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		dma_addr_t addr = sg_dma_address(sg);
+		if (WARN(addr < 0x100000 ||
+			 addr == 0x20050000 ||
+			 addr == 0x20110000 ||
+			 addr == 0x20130000 ||
+			 addr == 0x20138000 ||
+			 addr == 0x40004000,
+			 "object references unaddressable physical pages: addr=%x",
+			 (u32)addr))
+			return false;
+	}
+
+	return true;
+}
+
 int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
 {
-	if (obj->has_dma_mapping)
-		return 0;
+	int ret;
 
-	if (!dma_map_sg(&obj->base.dev->pdev->dev,
+	if (!obj->has_dma_mapping &&
+	    !dma_map_sg(&obj->base.dev->pdev->dev,
 			obj->pages->sgl, obj->pages->nents,
 			PCI_DMA_BIDIRECTIONAL))
 		return -ENOSPC;
 
+	if (IS_GEN6(obj->base.dev) && !gen6_valid_addresses(obj)) {
+		ret = -EFAULT;
+		goto unmap;
+	}
+
 	return 0;
+
+unmap:
+	if (!obj->has_dma_mapping)
+		dma_unmap_sg(&obj->base.dev->pdev->dev,
+			     obj->pages->sgl, obj->pages->nents,
+			     PCI_DMA_BIDIRECTIONAL);
+	return ret;
 }
 
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,