Message ID | 20161012090522.367-4-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Oct 12, 2016 at 10:05:20AM +0100, Chris Wilson wrote: > Since the GTT provides universal access to any GPU page, we can use it > to reduce our plethora of read methods to just one. It also has the > important characteristic of being exactly what the GPU sees - if there > are incoherency problems, seeing the batch as executed (rather than as > trapped inside the cpu cache) is important. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Meh, so much for me reading patches in order ;-) Please reorder this one before the previous one and I'll be happy too. > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 43 ++++++++---- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 + > drivers/gpu/drm/i915/i915_gpu_error.c | 120 ++++++++++++---------------------- > 3 files changed, 74 insertions(+), 91 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 0bb4232f66bc..2d846aa39ca5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > */ > struct i915_ggtt *ggtt = &dev_priv->ggtt; > unsigned long hole_start, hole_end; > + struct i915_hw_ppgtt *ppgtt; > struct drm_mm_node *entry; > int ret; > > @@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > if (ret) > return ret; > > + /* Reserve a mappable slot for our lockless error capture */ > + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, > + &ggtt->error_capture, > + 4096, 0, -1, > + 0, ggtt->mappable_end, > + 0, 0); > + if (ret) > + return ret; > + > /* Clear any non-preallocated blocks */ > drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { > DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", > @@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > true); > > if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { > - struct i915_hw_ppgtt *ppgtt; > - > ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); > - if (!ppgtt) > - return -ENOMEM; > + if (!ppgtt) { > + ret = -ENOMEM; > + goto err; > + } > > ret = __hw_ppgtt_init(ppgtt, dev_priv); > - if (ret) { > - kfree(ppgtt); > - return ret; > - } > + if (ret) > + goto err_ppgtt; > > - if (ppgtt->base.allocate_va_range) > + if (ppgtt->base.allocate_va_range) { > ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, > ppgtt->base.total); > - if (ret) { > - ppgtt->base.cleanup(&ppgtt->base); > - kfree(ppgtt); > - return ret; > + if (ret) > + goto err_ppgtt_cleanup; > } > > ppgtt->base.clear_range(&ppgtt->base, > @@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) > } > > return 0; > + > +err_ppgtt_cleanup: > + ppgtt->base.cleanup(&ppgtt->base); > +err_ppgtt: > + kfree(ppgtt); > +err: > + drm_mm_remove_node(&ggtt->error_capture); > + return ret; > } > > /** > @@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) > > i915_gem_cleanup_stolen(&dev_priv->drm); > > + if (drm_mm_node_allocated(&ggtt->error_capture)) > + drm_mm_remove_node(&ggtt->error_capture); > + > if (drm_mm_initialized(&ggtt->base.mm)) { > intel_vgt_deballoon(dev_priv); > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index ec78be2f8c77..bd93fb8f99d2 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -450,6 +450,8 @@ struct i915_ggtt { > bool do_idle_maps; > > int mtrr; > + > + struct drm_mm_node error_capture; > }; > > struct i915_hw_ppgtt { > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index 159d6d7e0cee..b3b2e6c1c6c6 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -656,7 +656,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) > return; > > for (page = 0; page < obj->page_count; page++) > - kfree(obj->pages[page]); > + free_page((unsigned long)obj->pages[page]); > > kfree(obj); > } > @@ -693,98 +693,69 @@ static void i915_error_state_free(struct kref *error_ref) > kfree(error); > } > > +static int compress_page(void *src, struct drm_i915_error_object *dst) Jumping ahead a bit with the name, but imo ok ... > +{ > + unsigned long page; > + > + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); > + if (!page) > + return -ENOMEM; > + > + dst->pages[dst->page_count++] = (void *)page; > + > + memcpy((void *)page, src, PAGE_SIZE); > + return 0; > +} > + > static struct drm_i915_error_object * > -i915_error_object_create(struct drm_i915_private *dev_priv, > +i915_error_object_create(struct drm_i915_private *i915, > struct i915_vma *vma) > { > - struct i915_ggtt *ggtt = &dev_priv->ggtt; > - struct drm_i915_gem_object *src; > + struct i915_ggtt *ggtt = &i915->ggtt; > + const u64 slot = ggtt->error_capture.start; > struct drm_i915_error_object *dst; > - int num_pages; > - bool use_ggtt; > - int i = 0; > - u64 reloc_offset; > + unsigned long num_pages; > + struct sgt_iter iter; > + dma_addr_t dma; > > if (!vma) > return NULL; > > - src = vma->obj; > - if (!src->pages) > - return NULL; > - > - num_pages = src->base.size >> PAGE_SHIFT; > - > - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); > + num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; > + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), > + GFP_ATOMIC | __GFP_NOWARN); > if (!dst) > return NULL; > > dst->gtt_offset = vma->node.start; > dst->gtt_size = vma->node.size; > + dst->page_count = 0; > > - reloc_offset = dst->gtt_offset; > - use_ggtt = (src->cache_level == I915_CACHE_NONE && > - (vma->flags & I915_VMA_GLOBAL_BIND) && > - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); > - > - /* Cannot access stolen address directly, try to use the aperture */ > - if (src->stolen) { > - use_ggtt = true; > - > - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) > - goto unwind; > - > - reloc_offset = vma->node.start; > - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) > - goto unwind; > - } > + for_each_sgt_dma(dma, iter, vma->pages) { > + void __iomem *s; > + int ret; > > - /* Cannot access snooped pages through the aperture */ > - if (use_ggtt && src->cache_level != I915_CACHE_NONE && > - !HAS_LLC(dev_priv)) > - goto unwind; > + ggtt->base.insert_page(&ggtt->base, dma, slot, > + I915_CACHE_NONE, 0); > > - dst->page_count = num_pages; > - while (num_pages--) { > - void *d; > + s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); > + ret = compress_page((void * __force)s, dst); > + io_mapping_unmap_atomic(s); > > - d = kmalloc(PAGE_SIZE, GFP_ATOMIC); > - if (d == NULL) > + if (ret) > goto unwind; > - > - if (use_ggtt) { > - void __iomem *s; > - > - /* Simply ignore tiling or any overlapping fence. > - * It's part of the error state, and this hopefully > - * captures what the GPU read. > - */ > - > - s = io_mapping_map_atomic_wc(&ggtt->mappable, > - reloc_offset); > - memcpy_fromio(d, s, PAGE_SIZE); > - io_mapping_unmap_atomic(s); > - } else { > - struct page *page; > - void *s; > - > - page = i915_gem_object_get_page(src, i); > - > - s = kmap_atomic(page); > - memcpy(d, s, PAGE_SIZE); > - kunmap_atomic(s); > - } > - > - dst->pages[i++] = d; > - reloc_offset += PAGE_SIZE; > } > - > - return dst; > + goto out; > > unwind: > - while (i--) > - kfree(dst->pages[i]); > + while (dst->page_count--) > + free_page((unsigned long)dst->pages[dst->page_count]); > kfree(dst); > - return NULL; > + dst = NULL; > + > +out: > + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true); > + return dst; > } > > /* The error capture is special as tries to run underneath the normal > @@ -1445,9 +1416,6 @@ static int capture(void *data) > { > struct drm_i915_error_state *error = data; > > - /* Ensure that what we readback from memory matches what the GPU sees */ > - wbinvd(); > - > i915_capture_gen_state(error->i915, error); > i915_capture_reg_state(error->i915, error); > i915_gem_record_fences(error->i915, error); > @@ -1460,9 +1428,6 @@ static int capture(void *data) > error->overlay = intel_overlay_capture_error_state(error->i915); > error->display = intel_display_capture_error_state(error->i915); > > - /* And make sure we don't leave trash in the CPU cache */ > - wbinvd(); > - > return 0; > } > > @@ -1539,7 +1504,6 @@ void i915_error_state_get(struct drm_device *dev, > if (error_priv->error) > kref_get(&error_priv->error->ref); > spin_unlock_irq(&dev_priv->gpu_error.lock); > - > } Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
On Thu, Oct 13, 2016 at 05:02:50PM +0200, Daniel Vetter wrote: > On Wed, Oct 12, 2016 at 10:05:20AM +0100, Chris Wilson wrote: > > Since the GTT provides universal access to any GPU page, we can use it > > to reduce our plethora of read methods to just one. It also has the > > important characteristic of being exactly what the GPU sees - if there > > are incoherency problems, seeing the batch as executed (rather than as > > trapped inside the cpu cache) is important. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > Meh, so much for me reading patches in order ;-) > > Please reorder this one before the previous one and I'll be happy too. Hmm, I actually thought this was. Shows what I remember about my series. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0bb4232f66bc..2d846aa39ca5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2717,6 +2717,7 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; + struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2724,6 +2725,15 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) if (ret) return ret; + /* Reserve a mappable slot for our lockless error capture */ + ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, + &ggtt->error_capture, + 4096, 0, -1, + 0, ggtt->mappable_end, + 0, 0); + if (ret) + return ret; + /* Clear any non-preallocated blocks */ drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) { DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", @@ -2738,25 +2748,21 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) true); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - struct i915_hw_ppgtt *ppgtt; - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return -ENOMEM; + if (!ppgtt) { + ret = -ENOMEM; + goto err; + } ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret) { - kfree(ppgtt); - return ret; - } + if (ret) + goto err_ppgtt; - if (ppgtt->base.allocate_va_range) + if (ppgtt->base.allocate_va_range) { ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, ppgtt->base.total); - if (ret) { - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - return ret; + if (ret) + goto err_ppgtt_cleanup; } ppgtt->base.clear_range(&ppgtt->base, @@ -2770,6 +2776,14 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) } return 0; + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + kfree(ppgtt); +err: + drm_mm_remove_node(&ggtt->error_capture); + return ret; } /** @@ -2788,6 +2802,9 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) i915_gem_cleanup_stolen(&dev_priv->drm); + if (drm_mm_node_allocated(&ggtt->error_capture)) + drm_mm_remove_node(&ggtt->error_capture); + if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index ec78be2f8c77..bd93fb8f99d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -450,6 +450,8 @@ struct i915_ggtt { bool do_idle_maps; int mtrr; + + struct drm_mm_node error_capture; }; struct i915_hw_ppgtt { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 159d6d7e0cee..b3b2e6c1c6c6 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -656,7 +656,7 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) return; for (page = 0; page < obj->page_count; page++) - kfree(obj->pages[page]); + free_page((unsigned long)obj->pages[page]); kfree(obj); } @@ -693,98 +693,69 @@ static void i915_error_state_free(struct kref *error_ref) kfree(error); } +static int compress_page(void *src, struct drm_i915_error_object *dst) +{ + unsigned long page; + + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); + if (!page) + return -ENOMEM; + + dst->pages[dst->page_count++] = (void *)page; + + memcpy((void *)page, src, PAGE_SIZE); + return 0; +} + static struct drm_i915_error_object * -i915_error_object_create(struct drm_i915_private *dev_priv, +i915_error_object_create(struct drm_i915_private *i915, struct i915_vma *vma) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_object *src; + struct i915_ggtt *ggtt = &i915->ggtt; + const u64 slot = ggtt->error_capture.start; struct drm_i915_error_object *dst; - int num_pages; - bool use_ggtt; - int i = 0; - u64 reloc_offset; + unsigned long num_pages; + struct sgt_iter iter; + dma_addr_t dma; if (!vma) return NULL; - src = vma->obj; - if (!src->pages) - return NULL; - - num_pages = src->base.size >> PAGE_SHIFT; - - dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); + num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT; + dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), + GFP_ATOMIC | __GFP_NOWARN); if (!dst) return NULL; dst->gtt_offset = vma->node.start; dst->gtt_size = vma->node.size; + dst->page_count = 0; - reloc_offset = dst->gtt_offset; - use_ggtt = (src->cache_level == I915_CACHE_NONE && - (vma->flags & I915_VMA_GLOBAL_BIND) && - reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); - - /* Cannot access stolen address directly, try to use the aperture */ - if (src->stolen) { - use_ggtt = true; - - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) - goto unwind; - - reloc_offset = vma->node.start; - if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) - goto unwind; - } + for_each_sgt_dma(dma, iter, vma->pages) { + void __iomem *s; + int ret; - /* Cannot access snooped pages through the aperture */ - if (use_ggtt && src->cache_level != I915_CACHE_NONE && - !HAS_LLC(dev_priv)) - goto unwind; + ggtt->base.insert_page(&ggtt->base, dma, slot, + I915_CACHE_NONE, 0); - dst->page_count = num_pages; - while (num_pages--) { - void *d; + s = io_mapping_map_atomic_wc(&ggtt->mappable, slot); + ret = compress_page((void * __force)s, dst); + io_mapping_unmap_atomic(s); - d = kmalloc(PAGE_SIZE, GFP_ATOMIC); - if (d == NULL) + if (ret) goto unwind; - - if (use_ggtt) { - void __iomem *s; - - /* Simply ignore tiling or any overlapping fence. - * It's part of the error state, and this hopefully - * captures what the GPU read. - */ - - s = io_mapping_map_atomic_wc(&ggtt->mappable, - reloc_offset); - memcpy_fromio(d, s, PAGE_SIZE); - io_mapping_unmap_atomic(s); - } else { - struct page *page; - void *s; - - page = i915_gem_object_get_page(src, i); - - s = kmap_atomic(page); - memcpy(d, s, PAGE_SIZE); - kunmap_atomic(s); - } - - dst->pages[i++] = d; - reloc_offset += PAGE_SIZE; } - - return dst; + goto out; unwind: - while (i--) - kfree(dst->pages[i]); + while (dst->page_count--) + free_page((unsigned long)dst->pages[dst->page_count]); kfree(dst); - return NULL; + dst = NULL; + +out: + ggtt->base.clear_range(&ggtt->base, slot, PAGE_SIZE, true); + return dst; } /* The error capture is special as tries to run underneath the normal @@ -1445,9 +1416,6 @@ static int capture(void *data) { struct drm_i915_error_state *error = data; - /* Ensure that what we readback from memory matches what the GPU sees */ - wbinvd(); - i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); i915_gem_record_fences(error->i915, error); @@ -1460,9 +1428,6 @@ static int capture(void *data) error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); - /* And make sure we don't leave trash in the CPU cache */ - wbinvd(); - return 0; } @@ -1539,7 +1504,6 @@ void i915_error_state_get(struct drm_device *dev, if (error_priv->error) kref_get(&error_priv->error->ref); spin_unlock_irq(&dev_priv->gpu_error.lock); - } void i915_error_state_put(struct i915_error_state_file_priv *error_priv)