Message ID | 20180614192404.24534-2-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 14 June 2018 at 20:24, Chris Wilson <chris@chris-wilson.co.uk> wrote: > From: Jon Bloomfield <jon.bloomfield@intel.com> > > Hook up the flags to allow read-only ppGTT mappings for gen8+ > > v2: Include a selftest to check that writes to a readonly PTE are > dropped > > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Matthew Auld <matthew.william.auld@gmail.com> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1 > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 45 ++++-- > drivers/gpu/drm/i915/i915_gem_gtt.h | 7 +- > drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +- > .../gpu/drm/i915/selftests/i915_gem_context.c | 138 ++++++++++++++++++ > 4 files changed, 182 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index c3630abbd260..bfe23e10a127 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -204,7 +204,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, > return err; > } > > - /* Currently applicable only to VLV */ > + /* Applicable to VLV, and gen8+ */ > pte_flags = 0; > if (vma->obj->gt_ro) > pte_flags |= PTE_READ_ONLY; > @@ -961,10 +961,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, > struct i915_page_directory_pointer *pdp, > struct sgt_dma *iter, > struct gen8_insert_pte *idx, > - enum i915_cache_level cache_level) > + enum i915_cache_level cache_level, > + u32 flags) > { > struct i915_page_directory *pd; > - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); > gen8_pte_t *vaddr; > bool ret; > > @@ -1015,14 +1016,14 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, > static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > struct i915_vma *vma, > enum i915_cache_level cache_level, > - u32 unused) > + u32 flags) > { > struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > struct sgt_dma iter = sgt_dma(vma); > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, > - cache_level); > + cache_level, flags); > > vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; > } > @@ -1030,9 +1031,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > struct i915_page_directory_pointer **pdps, > struct sgt_dma *iter, > - enum i915_cache_level cache_level) > + enum i915_cache_level cache_level, > + u32 flags) > { > - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); > u64 start = vma->node.start; > dma_addr_t rem = iter->sg->length; > > @@ -1148,19 +1150,21 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, > struct i915_vma *vma, > enum i915_cache_level cache_level, > - u32 unused) > + u32 flags) > { > struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > struct sgt_dma iter = sgt_dma(vma); > struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; > > if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { > - gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); > + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level, > + flags); > } else { > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], > - &iter, &idx, cache_level)) > + &iter, &idx, cache_level, > + flags)) > GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); > > vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; > @@ -1573,6 +1577,9 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > 1ULL << 48 : > 1ULL << 32; > > + /* From bdw, there is support for read-only pages in the PPGTT */ > + ppgtt->vm.has_read_only = true; > + > /* There are only few exceptions for gen >=6. chv and bxt. > * And we are not sure about the latter so play safe for now. > */ > @@ -2408,7 +2415,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, > static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > struct i915_vma *vma, > enum i915_cache_level level, > - u32 unused) > + u32 flags) > { > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > struct sgt_iter sgt_iter; > @@ -2416,6 +2423,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); > dma_addr_t addr; > > + /* The GTT does not support read-only mappings */ > + GEM_BUG_ON(flags & PTE_READ_ONLY); > + > gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; > gtt_entries += vma->node.start >> PAGE_SHIFT; > for_each_sgt_dma(addr, sgt_iter, vma->pages) > @@ -2542,13 +2552,14 @@ struct insert_entries { > struct i915_address_space *vm; > struct i915_vma *vma; > enum i915_cache_level level; > + u32 flags; > }; > > static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) > { > struct insert_entries *arg = _arg; > > - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); > + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); > bxt_vtd_ggtt_wa(arg->vm); > > return 0; > @@ -2557,9 +2568,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) > static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, > struct i915_vma *vma, > enum i915_cache_level level, > - u32 unused) > + u32 flags) > { > - struct insert_entries arg = { vm, vma, level }; > + struct insert_entries arg = { vm, vma, level, flags }; > > stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); > } > @@ -2650,7 +2661,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, > struct drm_i915_gem_object *obj = vma->obj; > u32 pte_flags; > > - /* Currently applicable only to VLV */ > + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ > pte_flags = 0; > if (obj->gt_ro) > pte_flags |= PTE_READ_ONLY; > @@ -3530,6 +3541,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) > */ > mutex_lock(&dev_priv->drm.struct_mutex); > i915_address_space_init(&ggtt->vm, dev_priv, "[global]"); > + > + /* Only VLV supports read-only GGTT mappings */ > + ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv); > + > if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) > ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; > mutex_unlock(&dev_priv->drm.struct_mutex); > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 46c95d188580..67a13a0709fc 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -325,7 +325,12 @@ struct i915_address_space { > struct list_head unbound_list; > > struct pagevec free_pages; > - bool pt_kmap_wc; > + > + /* Some systems require uncached updates of the page directories */ > + bool pt_kmap_wc:1; > + > + /* Some systems support read-only mappings for GGTT and/or PPGTT */ > + bool has_read_only:1; > > /* FIXME: Need a more generic return type */ > gen6_pte_t (*pte_encode)(dma_addr_t addr, > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index ef3c76425843..8853a5e6d421 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -1097,6 +1097,7 @@ void intel_ring_unpin(struct intel_ring *ring) > static struct i915_vma * > intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) > { > + struct i915_address_space *vm = &dev_priv->ggtt.vm; > struct drm_i915_gem_object *obj; > struct i915_vma *vma; > > @@ -1106,10 +1107,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) > if (IS_ERR(obj)) > return ERR_CAST(obj); > > - /* mark ring buffers as read-only from GPU side by default */ > - obj->gt_ro = 1; > + /* > + * Mark ring buffers as read-only from GPU side (so no stray overwrites) > + * if supported by the platform's GGTT. > + */ > + if (vm->has_read_only) > + obj->gt_ro = 1; > > - vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); > + vma = i915_vma_instance(obj, vm, NULL); > if (IS_ERR(vma)) > goto err; > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > index 836f1af8b833..5bae52068926 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -23,6 +23,7 @@ > */ > > #include "../i915_selftest.h" > +#include "i915_random.h" > #include "igt_flush_test.h" > > #include "mock_drm.h" > @@ -266,6 +267,41 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) > return err; > } > > +static int ro_check(struct drm_i915_gem_object *obj, unsigned int max) > +{ > + unsigned int n, m, needs_flush; > + int err; > + > + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); > + if (err) > + return err; > + > + for (n = 0; n < real_page_count(obj); n++) { > + u32 *map; > + > + map = kmap_atomic(i915_gem_object_get_page(obj, n)); > + if (needs_flush & CLFLUSH_BEFORE) > + drm_clflush_virt_range(map, PAGE_SIZE); > + > + for (m = 0; m < DW_PER_PAGE; m++) { m < max; or don't pass max? > + if (map[m] != 0xdeadbeef) { > + pr_err("Invalid value (overwritten) at page %d, offset %d: found %08x expected %08x\n", > + n, m, map[m], 0xdeadbeef); > + err = -EINVAL; > + goto out_unmap; > + } > + } > + > +out_unmap: > + kunmap_atomic(map); > + if (err) > + break; > + } > + > + i915_gem_obj_finish_shmem_access(obj); > + return err; > +} > + > static int file_add_object(struct drm_file *file, > struct drm_i915_gem_object *obj) > { > @@ -421,6 +457,107 @@ static int igt_ctx_exec(void *arg) > return err; > } > > +static int igt_ctx_readonly(void *arg) > +{ > + struct drm_i915_private *i915 = arg; > + struct drm_i915_gem_object *obj = NULL; > + struct drm_file *file; > + I915_RND_STATE(prng); > + IGT_TIMEOUT(end_time); > + LIST_HEAD(objects); > + struct i915_gem_context *ctx; > + struct i915_hw_ppgtt *ppgtt; > + unsigned long ndwords, dw; > + int err = -ENODEV; > + > + /* Create a few different contexts (with different mm) and write > + * through each ctx/mm using the GPU making sure those writes end > + * up in the expected pages of our obj. > + */ /*\n Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Quoting Matthew Auld (2018-06-14 22:32:09) > On 14 June 2018 at 20:24, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > From: Jon Bloomfield <jon.bloomfield@intel.com> > > > > Hook up the flags to allow read-only ppGTT mappings for gen8+ > > > > v2: Include a selftest to check that writes to a readonly PTE are > > dropped > > > > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > Cc: Matthew Auld <matthew.william.auld@gmail.com> > > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > > Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1 > > --- > > drivers/gpu/drm/i915/i915_gem_gtt.c | 45 ++++-- > > drivers/gpu/drm/i915/i915_gem_gtt.h | 7 +- > > drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +- > > .../gpu/drm/i915/selftests/i915_gem_context.c | 138 ++++++++++++++++++ > > 4 files changed, 182 insertions(+), 19 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index c3630abbd260..bfe23e10a127 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -204,7 +204,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, > > return err; > > } > > > > - /* Currently applicable only to VLV */ > > + /* Applicable to VLV, and gen8+ */ > > pte_flags = 0; > > if (vma->obj->gt_ro) > > pte_flags |= PTE_READ_ONLY; > > @@ -961,10 +961,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, > > struct i915_page_directory_pointer *pdp, > > struct sgt_dma *iter, > > struct gen8_insert_pte *idx, > > - enum i915_cache_level cache_level) > > + enum i915_cache_level cache_level, > > + u32 flags) > > { > > struct i915_page_directory *pd; > > - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); > > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); > > gen8_pte_t *vaddr; > > bool ret; > > > > @@ -1015,14 +1016,14 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, > > static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > > struct i915_vma *vma, > > enum i915_cache_level cache_level, > > - u32 unused) > > + u32 flags) > > { > > struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > > struct sgt_dma iter = sgt_dma(vma); > > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > > > gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, > > - cache_level); > > + cache_level, flags); > > > > vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; > > } > > @@ -1030,9 +1031,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > > static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > > struct i915_page_directory_pointer **pdps, > > struct sgt_dma *iter, > > - enum i915_cache_level cache_level) > > + enum i915_cache_level cache_level, > > + u32 flags) > > { > > - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); > > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); > > u64 start = vma->node.start; > > dma_addr_t rem = iter->sg->length; > > > > @@ -1148,19 +1150,21 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > > static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, > > struct i915_vma *vma, > > enum i915_cache_level cache_level, > > - u32 unused) > > + u32 flags) > > { > > struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > > struct sgt_dma iter = sgt_dma(vma); > > struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; > > > > if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { > > - gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); > > + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level, > > + flags); > > } else { > > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > > > while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], > > - &iter, &idx, cache_level)) > > + &iter, &idx, cache_level, > > + flags)) > > GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); > > > > vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; > > @@ -1573,6 +1577,9 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > > 1ULL << 48 : > > 1ULL << 32; > > > > + /* From bdw, there is support for read-only pages in the PPGTT */ > > + ppgtt->vm.has_read_only = true; > > + > > /* There are only few exceptions for gen >=6. chv and bxt. > > * And we are not sure about the latter so play safe for now. > > */ > > @@ -2408,7 +2415,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, > > static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > > struct i915_vma *vma, > > enum i915_cache_level level, > > - u32 unused) > > + u32 flags) > > { > > struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); > > struct sgt_iter sgt_iter; > > @@ -2416,6 +2423,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, > > const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); > > dma_addr_t addr; > > > > + /* The GTT does not support read-only mappings */ > > + GEM_BUG_ON(flags & PTE_READ_ONLY); > > + > > gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; > > gtt_entries += vma->node.start >> PAGE_SHIFT; > > for_each_sgt_dma(addr, sgt_iter, vma->pages) > > @@ -2542,13 +2552,14 @@ struct insert_entries { > > struct i915_address_space *vm; > > struct i915_vma *vma; > > enum i915_cache_level level; > > + u32 flags; > > }; > > > > static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) > > { > > struct insert_entries *arg = _arg; > > > > - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); > > + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); > > bxt_vtd_ggtt_wa(arg->vm); > > > > return 0; > > @@ -2557,9 +2568,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) > > static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, > > struct i915_vma *vma, > > enum i915_cache_level level, > > - u32 unused) > > + u32 flags) > > { > > - struct insert_entries arg = { vm, vma, level }; > > + struct insert_entries arg = { vm, vma, level, flags }; > > > > stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); > > } > > @@ -2650,7 +2661,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, > > struct drm_i915_gem_object *obj = vma->obj; > > u32 pte_flags; > > > > - /* Currently applicable only to VLV */ > > + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ > > pte_flags = 0; > > if (obj->gt_ro) > > pte_flags |= PTE_READ_ONLY; > > @@ -3530,6 +3541,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) > > */ > > mutex_lock(&dev_priv->drm.struct_mutex); > > i915_address_space_init(&ggtt->vm, dev_priv, "[global]"); > > + > > + /* Only VLV supports read-only GGTT mappings */ > > + ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv); > > + > > if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) > > ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; > > mutex_unlock(&dev_priv->drm.struct_mutex); > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > > index 46c95d188580..67a13a0709fc 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > > @@ -325,7 +325,12 @@ struct i915_address_space { > > struct list_head unbound_list; > > > > struct pagevec free_pages; > > - bool pt_kmap_wc; > > + > > + /* Some systems require uncached updates of the page directories */ > > + bool pt_kmap_wc:1; > > + > > + /* Some systems support read-only mappings for GGTT and/or PPGTT */ > > + bool has_read_only:1; > > > > /* FIXME: Need a more generic return type */ > > gen6_pte_t (*pte_encode)(dma_addr_t addr, > > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > > index ef3c76425843..8853a5e6d421 100644 > > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > > @@ -1097,6 +1097,7 @@ void intel_ring_unpin(struct intel_ring *ring) > > static struct i915_vma * > > intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) > > { > > + struct i915_address_space *vm = &dev_priv->ggtt.vm; > > struct drm_i915_gem_object *obj; > > struct i915_vma *vma; > > > > @@ -1106,10 +1107,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) > > if (IS_ERR(obj)) > > return ERR_CAST(obj); > > > > - /* mark ring buffers as read-only from GPU side by default */ > > - obj->gt_ro = 1; > > + /* > > + * Mark ring buffers as read-only from GPU side (so no stray overwrites) > > + * if supported by the platform's GGTT. > > + */ > > + if (vm->has_read_only) > > + obj->gt_ro = 1; > > > > - vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); > > + vma = i915_vma_instance(obj, vm, NULL); > > if (IS_ERR(vma)) > > goto err; > > > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > > index 836f1af8b833..5bae52068926 100644 > > --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c > > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > > @@ -23,6 +23,7 @@ > > */ > > > > #include "../i915_selftest.h" > > +#include "i915_random.h" > > #include "igt_flush_test.h" > > > > #include "mock_drm.h" > > @@ -266,6 +267,41 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) > > return err; > > } > > > > +static int ro_check(struct drm_i915_gem_object *obj, unsigned int max) > > +{ > > + unsigned int n, m, needs_flush; > > + int err; > > + > > + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); > > + if (err) > > + return err; > > + > > + for (n = 0; n < real_page_count(obj); n++) { > > + u32 *map; > > + > > + map = kmap_atomic(i915_gem_object_get_page(obj, n)); > > + if (needs_flush & CLFLUSH_BEFORE) > > + drm_clflush_virt_range(map, PAGE_SIZE); > > + > > + for (m = 0; m < DW_PER_PAGE; m++) { > > m < max; or don't pass max? max is as far as we try to write, the rest should be filled with 0xdeadbeef from initialisation anyway. Or I should just pull this into cpu_check() and stop the duplication. (At first it seemed sensible, then mixed in some writes just to check the code.) -Chris
Quoting Chris Wilson (2018-06-14 22:24:01) > From: Jon Bloomfield <jon.bloomfield@intel.com> > > Hook up the flags to allow read-only ppGTT mappings for gen8+ > > v2: Include a selftest to check that writes to a readonly PTE are > dropped > > Signed-off-by: Jon Bloomfield <jon.bloomfield@intel.com> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Matthew Auld <matthew.william.auld@gmail.com> > Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> #v1 > Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com> #v1 <SNIP> > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c > @@ -23,6 +23,7 @@ > */ > > #include "../i915_selftest.h" > +#include "i915_random.h" > #include "igt_flush_test.h" > > #include "mock_drm.h" > @@ -266,6 +267,41 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) > return err; > } > > +static int ro_check(struct drm_i915_gem_object *obj, unsigned int max) > +{ > + unsigned int n, m, needs_flush; > + int err; > + > + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); > + if (err) > + return err; > + > + for (n = 0; n < real_page_count(obj); n++) { > + u32 *map; > + > + map = kmap_atomic(i915_gem_object_get_page(obj, n)); > + if (needs_flush & CLFLUSH_BEFORE) > + drm_clflush_virt_range(map, PAGE_SIZE); > + > + for (m = 0; m < DW_PER_PAGE; m++) { > + if (map[m] != 0xdeadbeef) { One could have #define MAGIC 0xdeadbeef > + pr_err("Invalid value (overwritten) at page %d, offset %d: found %08x expected %08x\n", > + n, m, map[m], 0xdeadbeef); > + err = -EINVAL; > + goto out_unmap; > + } > + } > + > +out_unmap: > + kunmap_atomic(map); > + if (err) > + break; > + } > + > + i915_gem_obj_finish_shmem_access(obj); > + return err; > +} <SNIP> > +static int igt_ctx_readonly(void *arg) > +{ > + struct drm_i915_private *i915 = arg; > + struct drm_i915_gem_object *obj = NULL; > + struct drm_file *file; > + I915_RND_STATE(prng); > + IGT_TIMEOUT(end_time); > + LIST_HEAD(objects); > + struct i915_gem_context *ctx; > + struct i915_hw_ppgtt *ppgtt; > + unsigned long ndwords, dw; > + int err = -ENODEV; > + > + /* Create a few different contexts (with different mm) and write As noted by Matt. Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Regards, Joonas
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c3630abbd260..bfe23e10a127 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -204,7 +204,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, return err; } - /* Currently applicable only to VLV */ + /* Applicable to VLV, and gen8+ */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -961,10 +961,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, struct gen8_insert_pte *idx, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, + u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; bool ret; @@ -1015,14 +1016,14 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, - cache_level); + cache_level, flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } @@ -1030,9 +1031,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, struct i915_page_directory_pointer **pdps, struct sgt_dma *iter, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, + u32 flags) { - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, 0); + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); u64 start = vma->node.start; dma_addr_t rem = iter->sg->length; @@ -1148,19 +1150,21 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level, + flags); } else { struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], - &iter, &idx, cache_level)) + &iter, &idx, cache_level, + flags)) GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -1573,6 +1577,9 @@ static struct i915_hw_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) 1ULL << 48 : 1ULL << 32; + /* From bdw, there is support for read-only pages in the PPGTT */ + ppgtt->vm.has_read_only = true; + /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -2408,7 +2415,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; @@ -2416,6 +2423,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); dma_addr_t addr; + /* The GTT does not support read-only mappings */ + GEM_BUG_ON(flags & PTE_READ_ONLY); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start >> PAGE_SHIFT; for_each_sgt_dma(addr, sgt_iter, vma->pages) @@ -2542,13 +2552,14 @@ struct insert_entries { struct i915_address_space *vm; struct i915_vma *vma; enum i915_cache_level level; + u32 flags; }; static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) { struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); bxt_vtd_ggtt_wa(arg->vm); return 0; @@ -2557,9 +2568,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { - struct insert_entries arg = { vm, vma, level }; + struct insert_entries arg = { vm, vma, level, flags }; stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } @@ -2650,7 +2661,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags; - /* Currently applicable only to VLV */ + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -3530,6 +3541,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) */ mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_init(&ggtt->vm, dev_priv, "[global]"); + + /* Only VLV supports read-only GGTT mappings */ + ggtt->vm.has_read_only = IS_VALLEYVIEW(dev_priv); + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->vm.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 46c95d188580..67a13a0709fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -325,7 +325,12 @@ struct i915_address_space { struct list_head unbound_list; struct pagevec free_pages; - bool pt_kmap_wc; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ef3c76425843..8853a5e6d421 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1097,6 +1097,7 @@ void intel_ring_unpin(struct intel_ring *ring) static struct i915_vma * intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { + struct i915_address_space *vm = &dev_priv->ggtt.vm; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1106,10 +1107,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) if (IS_ERR(obj)) return ERR_CAST(obj); - /* mark ring buffers as read-only from GPU side by default */ - obj->gt_ro = 1; + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + obj->gt_ro = 1; - vma = i915_vma_instance(obj, &dev_priv->ggtt.vm, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 836f1af8b833..5bae52068926 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -23,6 +23,7 @@ */ #include "../i915_selftest.h" +#include "i915_random.h" #include "igt_flush_test.h" #include "mock_drm.h" @@ -266,6 +267,41 @@ static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) return err; } +static int ro_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value (overwritten) at page %d, offset %d: found %08x expected %08x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + static int file_add_object(struct drm_file *file, struct drm_i915_gem_object *obj) { @@ -421,6 +457,107 @@ static int igt_ctx_exec(void *arg) return err; } +static int igt_ctx_readonly(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj = NULL; + struct drm_file *file; + I915_RND_STATE(prng); + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + unsigned long ndwords, dw; + int err = -ENODEV; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&i915->drm.struct_mutex); + + ctx = i915_gem_create_context(i915, file->driver_priv); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + ppgtt = ctx->ppgtt ?: i915->mm.aliasing_ppgtt; + if (!ppgtt || !ppgtt->vm.has_read_only) { + err = 0; + goto out_unlock; + } + + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + unsigned int id; + + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + if (!obj) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + obj->gt_ro = prandom_u32_state(&prng); + } + + intel_runtime_pm_get(i915); + err = gpu_fill(obj, ctx, engine, dw); + intel_runtime_pm_put(i915); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) { + obj = NULL; + dw = 0; + } + ndwords++; + } + } + pr_info("Submitted %lu dwords (across %u engines)\n", + ndwords, INTEL_INFO(i915)->num_rings); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + if (obj->gt_ro) + err = ro_check(obj, rem); + else + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + static __maybe_unused const char * __engine_name(struct drm_i915_private *i915, unsigned int engines) { @@ -595,6 +732,7 @@ int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) static const struct i915_subtest tests[] = { SUBTEST(igt_switch_to_kernel_context), SUBTEST(igt_ctx_exec), + SUBTEST(igt_ctx_readonly), }; bool fake_alias = false; int err;