Message ID | 20210602170716.280491-4-thomas.hellstrom@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Move system memory to TTM for discrete | expand |
On Wed, 2 Jun 2021 at 18:08, Thomas Hellström <thomas.hellstrom@linux.intel.com> wrote: > > Instead of relying on a static placement, calculate at get_pages() time. > This should work for LMEM regions and system for now. For stolen we need > to take preallocated range into account. That well be added later. > > Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Can we split this patch out and merge it? We can use this for feeding in the per BO flags. > --- > drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 92 ++++++++++++++++++------- > drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++- > drivers/gpu/drm/i915/intel_region_ttm.h | 2 + > 3 files changed, 75 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > index c73c51755c20..8e1c01168c6d 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c > @@ -24,6 +24,11 @@ > #define I915_TTM_PRIO_NO_PAGES 1 > #define I915_TTM_PRIO_HAS_PAGES 2 > > +/* > + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs > + */ > +#define I915_TTM_MAX_PLACEMENTS 10 > + > /** > * struct i915_ttm_tt - TTM page vector with additional private information > * @ttm: The base TTM page vector. > @@ -42,32 +47,18 @@ struct i915_ttm_tt { > struct sg_table *cached_st; > }; > > -static const struct ttm_place lmem0_sys_placement_flags[] = { > - { > - .fpfn = 0, > - .lpfn = 0, > - .mem_type = I915_PL_LMEM0, > - .flags = 0, > - }, { > - .fpfn = 0, > - .lpfn = 0, > - .mem_type = I915_PL_SYSTEM, > - .flags = 0, > - } > -}; > - > -static struct ttm_placement i915_lmem0_placement = { > - .num_placement = 1, > - .placement = &lmem0_sys_placement_flags[0], > - .num_busy_placement = 1, > - .busy_placement = &lmem0_sys_placement_flags[0], > +static const struct ttm_place sys_placement_flags = { > + .fpfn = 0, > + .lpfn = 0, > + .mem_type = I915_PL_SYSTEM, > + .flags = 0, > }; > > static struct ttm_placement i915_sys_placement = { > .num_placement = 1, > - .placement = &lmem0_sys_placement_flags[1], > + .placement = &sys_placement_flags, > .num_busy_placement = 1, > - .busy_placement = &lmem0_sys_placement_flags[1], > + .busy_placement = &sys_placement_flags, > }; > > static bool gpu_binds_iomem(struct ttm_resource *mem) > @@ -83,6 +74,55 @@ static bool cpu_maps_iomem(struct ttm_resource *mem) > > static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); > > +static enum ttm_caching > +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) > +{ > + /* > + * Objects only allowed in system get cached cpu-mappings. > + * Other objects get WC mapping for now. Even if in system. > + */ > + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && > + obj->mm.n_placements <= 1) > + return ttm_cached; > + > + return ttm_write_combined; > +} > + > +static void > +i915_ttm_place_from_region(const struct intel_memory_region *mr, > + struct ttm_place *place) > +{ > + memset(place, 0, sizeof(*place)); > + place->mem_type = intel_region_to_ttm_type(mr); > +} > + > +static void > +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, > + struct ttm_place *requested, > + struct ttm_place *busy, > + struct ttm_placement *placement) > +{ > + unsigned int i; > + unsigned int num_allowed = obj->mm.n_placements; Style nit: Christmas tree > + > + placement->num_placement = 1; > + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : > + obj->mm.region, requested); > + > + /* Cache this on object? */ > + placement->num_busy_placement = num_allowed; > + for (i = 0; i < placement->num_busy_placement; ++i) > + i915_ttm_place_from_region(obj->mm.placements[i], busy + i); > + > + if (num_allowed == 0) { > + *busy = *requested; > + placement->num_busy_placement = 1; > + } > + > + placement->placement = requested; > + placement->busy_placement = busy; > +} > + > static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, > uint32_t page_flags) > { > @@ -100,7 +140,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, > man->use_tt) > page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; > > - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, ttm_write_combined); > + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, > + i915_ttm_select_tt_caching(obj)); > if (ret) { > kfree(i915_tt); > return NULL; > @@ -465,10 +506,13 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) > .no_wait_gpu = false, > }; > struct sg_table *st; > + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; > + struct ttm_placement placement; > int ret; > > /* Move to the requested placement. */ > - ret = ttm_bo_validate(bo, &i915_lmem0_placement, &ctx); > + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); > + ret = ttm_bo_validate(bo, &placement, &ctx); > if (ret) > return ret == -ENOSPC ? -ENXIO : ret; > > @@ -684,7 +728,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, > i915_gem_object_make_unshrinkable(obj); > INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); > mutex_init(&obj->ttm.get_io_page.lock); > - > bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : > ttm_bo_type_kernel; > > @@ -708,7 +751,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, > i915_ttm_adjust_domains_after_cpu_move(obj); > i915_ttm_adjust_gem_after_move(obj); > i915_gem_object_unlock(obj); > - > out: > /* i915 wants -ENXIO when out of memory region space. */ > return (ret == -ENOSPC) ? -ENXIO : ret; > diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c > index 0b41a1545570..bc58ea942ef9 100644 > --- a/drivers/gpu/drm/i915/intel_region_ttm.c > +++ b/drivers/gpu/drm/i915/intel_region_ttm.c > @@ -49,12 +49,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) > * driver-private types for now, reserving TTM_PL_VRAM for stolen > * memory and TTM_PL_TT for GGTT use if decided to implement this. > */ > -static int intel_region_to_ttm_type(struct intel_memory_region *mem) > +int intel_region_to_ttm_type(const struct intel_memory_region *mem) > { > int type; > > GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && > - mem->type != INTEL_MEMORY_MOCK); > + mem->type != INTEL_MEMORY_MOCK && > + mem->type != INTEL_MEMORY_SYSTEM); > + > + if (mem->type == INTEL_MEMORY_SYSTEM) > + return TTM_PL_SYSTEM; > > type = mem->instance + TTM_PL_PRIV; > GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); > diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h > index eaa3eccfa252..88960ae6cff6 100644 > --- a/drivers/gpu/drm/i915/intel_region_ttm.h > +++ b/drivers/gpu/drm/i915/intel_region_ttm.h > @@ -27,6 +27,8 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, > void intel_region_ttm_node_free(struct intel_memory_region *mem, > void *node); > > +int intel_region_to_ttm_type(const struct intel_memory_region *mem); > + > struct ttm_device_funcs *i915_ttm_driver(void); > > #ifdef CONFIG_DRM_I915_SELFTEST > -- > 2.31.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index c73c51755c20..8e1c01168c6d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -24,6 +24,11 @@ #define I915_TTM_PRIO_NO_PAGES 1 #define I915_TTM_PRIO_HAS_PAGES 2 +/* + * Size of struct ttm_place vector in on-stack struct ttm_placement allocs + */ +#define I915_TTM_MAX_PLACEMENTS 10 + /** * struct i915_ttm_tt - TTM page vector with additional private information * @ttm: The base TTM page vector. @@ -42,32 +47,18 @@ struct i915_ttm_tt { struct sg_table *cached_st; }; -static const struct ttm_place lmem0_sys_placement_flags[] = { - { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_LMEM0, - .flags = 0, - }, { - .fpfn = 0, - .lpfn = 0, - .mem_type = I915_PL_SYSTEM, - .flags = 0, - } -}; - -static struct ttm_placement i915_lmem0_placement = { - .num_placement = 1, - .placement = &lmem0_sys_placement_flags[0], - .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[0], +static const struct ttm_place sys_placement_flags = { + .fpfn = 0, + .lpfn = 0, + .mem_type = I915_PL_SYSTEM, + .flags = 0, }; static struct ttm_placement i915_sys_placement = { .num_placement = 1, - .placement = &lmem0_sys_placement_flags[1], + .placement = &sys_placement_flags, .num_busy_placement = 1, - .busy_placement = &lmem0_sys_placement_flags[1], + .busy_placement = &sys_placement_flags, }; static bool gpu_binds_iomem(struct ttm_resource *mem) @@ -83,6 +74,55 @@ static bool cpu_maps_iomem(struct ttm_resource *mem) static void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj); +static enum ttm_caching +i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj) +{ + /* + * Objects only allowed in system get cached cpu-mappings. + * Other objects get WC mapping for now. Even if in system. + */ + if (obj->mm.region->type == INTEL_MEMORY_SYSTEM && + obj->mm.n_placements <= 1) + return ttm_cached; + + return ttm_write_combined; +} + +static void +i915_ttm_place_from_region(const struct intel_memory_region *mr, + struct ttm_place *place) +{ + memset(place, 0, sizeof(*place)); + place->mem_type = intel_region_to_ttm_type(mr); +} + +static void +i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, + struct ttm_place *requested, + struct ttm_place *busy, + struct ttm_placement *placement) +{ + unsigned int i; + unsigned int num_allowed = obj->mm.n_placements; + + placement->num_placement = 1; + i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : + obj->mm.region, requested); + + /* Cache this on object? */ + placement->num_busy_placement = num_allowed; + for (i = 0; i < placement->num_busy_placement; ++i) + i915_ttm_place_from_region(obj->mm.placements[i], busy + i); + + if (num_allowed == 0) { + *busy = *requested; + placement->num_busy_placement = 1; + } + + placement->placement = requested; + placement->busy_placement = busy; +} + static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { @@ -100,7 +140,8 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, man->use_tt) page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC; - ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, ttm_write_combined); + ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, + i915_ttm_select_tt_caching(obj)); if (ret) { kfree(i915_tt); return NULL; @@ -465,10 +506,13 @@ static int i915_ttm_get_pages(struct drm_i915_gem_object *obj) .no_wait_gpu = false, }; struct sg_table *st; + struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS]; + struct ttm_placement placement; int ret; /* Move to the requested placement. */ - ret = ttm_bo_validate(bo, &i915_lmem0_placement, &ctx); + i915_ttm_placement_from_obj(obj, &requested, busy, &placement); + ret = ttm_bo_validate(bo, &placement, &ctx); if (ret) return ret == -ENOSPC ? -ENXIO : ret; @@ -684,7 +728,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_gem_object_make_unshrinkable(obj); INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN); mutex_init(&obj->ttm.get_io_page.lock); - bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device : ttm_bo_type_kernel; @@ -708,7 +751,6 @@ int __i915_gem_ttm_object_init(struct intel_memory_region *mem, i915_ttm_adjust_domains_after_cpu_move(obj); i915_ttm_adjust_gem_after_move(obj); i915_gem_object_unlock(obj); - out: /* i915 wants -ENXIO when out of memory region space. */ return (ret == -ENOSPC) ? -ENXIO : ret; diff --git a/drivers/gpu/drm/i915/intel_region_ttm.c b/drivers/gpu/drm/i915/intel_region_ttm.c index 0b41a1545570..bc58ea942ef9 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.c +++ b/drivers/gpu/drm/i915/intel_region_ttm.c @@ -49,12 +49,16 @@ void intel_region_ttm_device_fini(struct drm_i915_private *dev_priv) * driver-private types for now, reserving TTM_PL_VRAM for stolen * memory and TTM_PL_TT for GGTT use if decided to implement this. */ -static int intel_region_to_ttm_type(struct intel_memory_region *mem) +int intel_region_to_ttm_type(const struct intel_memory_region *mem) { int type; GEM_BUG_ON(mem->type != INTEL_MEMORY_LOCAL && - mem->type != INTEL_MEMORY_MOCK); + mem->type != INTEL_MEMORY_MOCK && + mem->type != INTEL_MEMORY_SYSTEM); + + if (mem->type == INTEL_MEMORY_SYSTEM) + return TTM_PL_SYSTEM; type = mem->instance + TTM_PL_PRIV; GEM_BUG_ON(type >= TTM_NUM_MEM_TYPES); diff --git a/drivers/gpu/drm/i915/intel_region_ttm.h b/drivers/gpu/drm/i915/intel_region_ttm.h index eaa3eccfa252..88960ae6cff6 100644 --- a/drivers/gpu/drm/i915/intel_region_ttm.h +++ b/drivers/gpu/drm/i915/intel_region_ttm.h @@ -27,6 +27,8 @@ struct sg_table *intel_region_ttm_node_to_st(struct intel_memory_region *mem, void intel_region_ttm_node_free(struct intel_memory_region *mem, void *node); +int intel_region_to_ttm_type(const struct intel_memory_region *mem); + struct ttm_device_funcs *i915_ttm_driver(void); #ifdef CONFIG_DRM_I915_SELFTEST
Instead of relying on a static placement, calculate at get_pages() time. This should work for LMEM regions and system for now. For stolen we need to take preallocated range into account. That well be added later. Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 92 ++++++++++++++++++------- drivers/gpu/drm/i915/intel_region_ttm.c | 8 ++- drivers/gpu/drm/i915/intel_region_ttm.h | 2 + 3 files changed, 75 insertions(+), 27 deletions(-)