Message ID | 20240828024901.2582335-25-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce GPU SVM and Xe SVM implementation | expand |
On Tue, Aug 27, 2024 at 07:48:57PM -0700, Matthew Brost wrote: > Wire xe_bo_move to GPUSVM migration to SRAM with trylocking of mmap > lock. > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > drivers/gpu/drm/xe/xe_bo.c | 35 +++++++++++++++++++++++++++++++- > drivers/gpu/drm/xe/xe_bo_types.h | 3 +++ > drivers/gpu/drm/xe/xe_svm.c | 2 ++ > drivers/gpu/drm/xe/xe_svm.h | 13 ++++++++++++ > 4 files changed, 52 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c > index ad804b6f9e84..ae71fcbe5380 100644 > --- a/drivers/gpu/drm/xe/xe_bo.c > +++ b/drivers/gpu/drm/xe/xe_bo.c > @@ -25,6 +25,7 @@ > #include "xe_pm.h" > #include "xe_preempt_fence.h" > #include "xe_res_cursor.h" > +#include "xe_svm.h" > #include "xe_trace_bo.h" > #include "xe_ttm_stolen_mgr.h" > #include "xe_vm.h" > @@ -250,6 +251,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, > static void xe_evict_flags(struct ttm_buffer_object *tbo, > struct ttm_placement *placement) > { > + struct xe_bo *bo; > + > if (!xe_bo_is_xe_bo(tbo)) { > /* Don't handle scatter gather BOs */ > if (tbo->type == ttm_bo_type_sg) { > @@ -261,6 +264,12 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, > return; > } > > + bo = ttm_to_xe_bo(tbo); > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) { > + *placement = sys_placement; > + return; > + } > + > /* > * For xe, sg bos that are evicted to system just triggers a > * rebind of the sg list upon subsequent validation to XE_PL_TT. > @@ -758,6 +767,17 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, > } > } > > + if (!move_lacks_source && (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) && > + new_mem->mem_type == XE_PL_SYSTEM) { > + ret = xe_svm_range_evict(bo->range); > + if (!ret) { > + drm_dbg(&xe->drm, "Evict system allocator BO success\n"); > + ttm_bo_move_null(ttm_bo, new_mem); > + } > + > + goto out; > + } > + > if (!move_lacks_source && > ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || > (mem_type_is_vram(old_mem_type) && > @@ -1096,6 +1116,19 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) > } > } > > +static bool xe_bo_eviction_valuable(struct ttm_buffer_object *ttm_bo, > + const struct ttm_place *place) > +{ > + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); > + > + /* Do not evict SVMs before having a binding */ > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC && > + !xe_svm_range_has_vram_binding(bo->range)) > + return false; > + > + return ttm_bo_eviction_valuable(ttm_bo, place); > +} > + > const struct ttm_device_funcs xe_ttm_funcs = { > .ttm_tt_create = xe_ttm_tt_create, > .ttm_tt_populate = xe_ttm_tt_populate, > @@ -1106,7 +1139,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { > .io_mem_reserve = xe_ttm_io_mem_reserve, > .io_mem_pfn = xe_ttm_io_mem_pfn, > .release_notify = xe_ttm_bo_release_notify, > - .eviction_valuable = ttm_bo_eviction_valuable, > + .eviction_valuable = xe_bo_eviction_valuable, > .delete_mem_notify = xe_ttm_bo_delete_mem_notify, > }; > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h > index 2ed558ac2264..4523b033417c 100644 > --- a/drivers/gpu/drm/xe/xe_bo_types.h > +++ b/drivers/gpu/drm/xe/xe_bo_types.h > @@ -16,6 +16,7 @@ > #include "xe_ggtt_types.h" > > struct xe_device; > +struct xe_svm_range; > struct xe_vm; > > #define XE_BO_MAX_PLACEMENTS 3 > @@ -47,6 +48,8 @@ struct xe_bo { > struct ttm_bo_kmap_obj kmap; > /** @pinned_link: link to present / evicted list of pinned BO */ > struct list_head pinned_link; > + /** @range: SVM range for BO */ > + struct xe_svm_range *range; > #ifdef CONFIG_PROC_FS > /** > * @client: @xe_drm_client which created the bo > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > index fd8987e0a506..dc9810828c0a 100644 > --- a/drivers/gpu/drm/xe/xe_svm.c > +++ b/drivers/gpu/drm/xe/xe_svm.c > @@ -531,6 +531,8 @@ static struct xe_bo *xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, > range->base.va.start, ttm_bo_type_device, > XE_BO_FLAG_VRAM_IF_DGFX(tile) | > XE_BO_FLAG_SYSTEM_ALLOC | XE_BO_FLAG_SKIP_CLEAR); > + if (!IS_ERR(bo)) > + bo->range = range; > xe_vm_unlock(vm); > if (IS_ERR(bo)) { > err = PTR_ERR(bo); > diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h > index 3f432483a230..b9cf0e2500da 100644 > --- a/drivers/gpu/drm/xe/xe_svm.h > +++ b/drivers/gpu/drm/xe/xe_svm.h > @@ -46,6 +46,19 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) > return range->base.flags.has_dma_mapping; > } > > +static inline bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) > +{ > + return xe_svm_range_in_vram(range) && range->tile_present; > +} > + > +static inline int xe_svm_range_evict(struct xe_svm_range *range) > +{ > + struct drm_gpusvm_ctx ctx = { .trylock_mmap = true, }; So even trying to acquire an mmap lock for eviction is I think a design bug for svm memory ranges. It's a bunch of physical memory, you have no idea how many mm/vma map it and which one you pick as the special one is fairly arbitrary. So dont, eviction should entirely ignore va/mm issues at the top level like the migrate_device_range function does (maybe we need a scatter-gather version of that instead of just a range. That function internally makes sure you're in sync with any vma/vm by: - installing migration ptes everywhere, which does the mmu_notifer dance - locking the pages to prevent other concurrent migration or other fun stuff from happening - then restore ptes to something sensisble when it's all done And it does that by looping over _all_ possible mappings of a page with the rmap_walk infrastructure. The only reason when we need the mmap lock (or vma lock or whatever) is if we need to be coherent with other concurrent mm updates of a specific mm. That should only be the case when migrating to vram, where the gpusvm->mm is the special one, and when migrating to sram due to cpu faults, where the vmf->vma->mm is special (and might at best have a tenous relationship to the gpusvm->mm). But that's the only cases where a specific mm and vma have any relevance to svm vram allocations. -Sima > + > + return drm_gpusvm_migrate_to_sram(range->base.gpusvm, &range->base, > + &ctx); > +} > + > #define xe_svm_notifier_lock(vm__) \ > drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) > > -- > 2.34.1 >
On Thu, Aug 29, 2024 at 12:14:53PM +0200, Daniel Vetter wrote: > On Tue, Aug 27, 2024 at 07:48:57PM -0700, Matthew Brost wrote: > > Wire xe_bo_move to GPUSVM migration to SRAM with trylocking of mmap > > lock. > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > drivers/gpu/drm/xe/xe_bo.c | 35 +++++++++++++++++++++++++++++++- > > drivers/gpu/drm/xe/xe_bo_types.h | 3 +++ > > drivers/gpu/drm/xe/xe_svm.c | 2 ++ > > drivers/gpu/drm/xe/xe_svm.h | 13 ++++++++++++ > > 4 files changed, 52 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c > > index ad804b6f9e84..ae71fcbe5380 100644 > > --- a/drivers/gpu/drm/xe/xe_bo.c > > +++ b/drivers/gpu/drm/xe/xe_bo.c > > @@ -25,6 +25,7 @@ > > #include "xe_pm.h" > > #include "xe_preempt_fence.h" > > #include "xe_res_cursor.h" > > +#include "xe_svm.h" > > #include "xe_trace_bo.h" > > #include "xe_ttm_stolen_mgr.h" > > #include "xe_vm.h" > > @@ -250,6 +251,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, > > static void xe_evict_flags(struct ttm_buffer_object *tbo, > > struct ttm_placement *placement) > > { > > + struct xe_bo *bo; > > + > > if (!xe_bo_is_xe_bo(tbo)) { > > /* Don't handle scatter gather BOs */ > > if (tbo->type == ttm_bo_type_sg) { > > @@ -261,6 +264,12 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, > > return; > > } > > > > + bo = ttm_to_xe_bo(tbo); > > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) { > > + *placement = sys_placement; > > + return; > > + } > > + > > /* > > * For xe, sg bos that are evicted to system just triggers a > > * rebind of the sg list upon subsequent validation to XE_PL_TT. > > @@ -758,6 +767,17 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, > > } > > } > > > > + if (!move_lacks_source && (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) && > > + new_mem->mem_type == XE_PL_SYSTEM) { > > + ret = xe_svm_range_evict(bo->range); > > + if (!ret) { > > + drm_dbg(&xe->drm, "Evict system allocator BO success\n"); > > + ttm_bo_move_null(ttm_bo, new_mem); > > + } > > + > > + goto out; > > + } > > + > > if (!move_lacks_source && > > ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || > > (mem_type_is_vram(old_mem_type) && > > @@ -1096,6 +1116,19 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) > > } > > } > > > > +static bool xe_bo_eviction_valuable(struct ttm_buffer_object *ttm_bo, > > + const struct ttm_place *place) > > +{ > > + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); > > + > > + /* Do not evict SVMs before having a binding */ > > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC && > > + !xe_svm_range_has_vram_binding(bo->range)) > > + return false; > > + > > + return ttm_bo_eviction_valuable(ttm_bo, place); > > +} > > + > > const struct ttm_device_funcs xe_ttm_funcs = { > > .ttm_tt_create = xe_ttm_tt_create, > > .ttm_tt_populate = xe_ttm_tt_populate, > > @@ -1106,7 +1139,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { > > .io_mem_reserve = xe_ttm_io_mem_reserve, > > .io_mem_pfn = xe_ttm_io_mem_pfn, > > .release_notify = xe_ttm_bo_release_notify, > > - .eviction_valuable = ttm_bo_eviction_valuable, > > + .eviction_valuable = xe_bo_eviction_valuable, > > .delete_mem_notify = xe_ttm_bo_delete_mem_notify, > > }; > > > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h > > index 2ed558ac2264..4523b033417c 100644 > > --- a/drivers/gpu/drm/xe/xe_bo_types.h > > +++ b/drivers/gpu/drm/xe/xe_bo_types.h > > @@ -16,6 +16,7 @@ > > #include "xe_ggtt_types.h" > > > > struct xe_device; > > +struct xe_svm_range; > > struct xe_vm; > > > > #define XE_BO_MAX_PLACEMENTS 3 > > @@ -47,6 +48,8 @@ struct xe_bo { > > struct ttm_bo_kmap_obj kmap; > > /** @pinned_link: link to present / evicted list of pinned BO */ > > struct list_head pinned_link; > > + /** @range: SVM range for BO */ > > + struct xe_svm_range *range; > > #ifdef CONFIG_PROC_FS > > /** > > * @client: @xe_drm_client which created the bo > > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > > index fd8987e0a506..dc9810828c0a 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.c > > +++ b/drivers/gpu/drm/xe/xe_svm.c > > @@ -531,6 +531,8 @@ static struct xe_bo *xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, > > range->base.va.start, ttm_bo_type_device, > > XE_BO_FLAG_VRAM_IF_DGFX(tile) | > > XE_BO_FLAG_SYSTEM_ALLOC | XE_BO_FLAG_SKIP_CLEAR); > > + if (!IS_ERR(bo)) > > + bo->range = range; > > xe_vm_unlock(vm); > > if (IS_ERR(bo)) { > > err = PTR_ERR(bo); > > diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h > > index 3f432483a230..b9cf0e2500da 100644 > > --- a/drivers/gpu/drm/xe/xe_svm.h > > +++ b/drivers/gpu/drm/xe/xe_svm.h > > @@ -46,6 +46,19 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) > > return range->base.flags.has_dma_mapping; > > } > > > > +static inline bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) > > +{ > > + return xe_svm_range_in_vram(range) && range->tile_present; > > +} > > + > > +static inline int xe_svm_range_evict(struct xe_svm_range *range) > > +{ > > + struct drm_gpusvm_ctx ctx = { .trylock_mmap = true, }; > > So even trying to acquire an mmap lock for eviction is I think a design > bug for svm memory ranges. It's a bunch of physical memory, you have no > idea how many mm/vma map it and which one you pick as the special one is > fairly arbitrary. > Let me drop whole trylock this and just evict via drm_gpusvm_evict_to_sram / migrate_device_vma_range which does not require the mmap. I add this code very recently per Matt Auld suggestion and agree it makes the try locking unnecessary. > So dont, eviction should entirely ignore va/mm issues at the top level > like the migrate_device_range function does (maybe we need a > scatter-gather version of that instead of just a range. > I needed to add migrate_device_vma_range (might be a bad name) as VRAM may be non-continuous pfns when memory pressure exists where as migrate_device_range only supports continuous pfns. > That function internally makes sure you're in sync with any vma/vm by: > - installing migration ptes everywhere, which does the mmu_notifer dance > - locking the pages to prevent other concurrent migration or other fun > stuff from happening > - then restore ptes to something sensisble when it's all done > > And it does that by looping over _all_ possible mappings of a page with > the rmap_walk infrastructure. > > The only reason when we need the mmap lock (or vma lock or whatever) is if > we need to be coherent with other concurrent mm updates of a specific mm. > That should only be the case when migrating to vram, where the gpusvm->mm > is the special one, and when migrating to sram due to cpu faults, where > the vmf->vma->mm is special (and might at best have a tenous relationship > to the gpusvm->mm). But that's the only cases where a specific mm and vma > have any relevance to svm vram allocations. > Thanks for the info. Matt > -Sima > > > + > > + return drm_gpusvm_migrate_to_sram(range->base.gpusvm, &range->base, > > + &ctx); > > +} > > + > > #define xe_svm_notifier_lock(vm__) \ > > drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) > > > > -- > > 2.34.1 > > > > -- > Daniel Vetter > Software Engineer, Intel Corporation > http://blog.ffwll.ch
On Thu, Aug 29, 2024 at 03:55:56PM +0000, Matthew Brost wrote: > On Thu, Aug 29, 2024 at 12:14:53PM +0200, Daniel Vetter wrote: > > On Tue, Aug 27, 2024 at 07:48:57PM -0700, Matthew Brost wrote: > > > Wire xe_bo_move to GPUSVM migration to SRAM with trylocking of mmap > > > lock. > > > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > --- > > > drivers/gpu/drm/xe/xe_bo.c | 35 +++++++++++++++++++++++++++++++- > > > drivers/gpu/drm/xe/xe_bo_types.h | 3 +++ > > > drivers/gpu/drm/xe/xe_svm.c | 2 ++ > > > drivers/gpu/drm/xe/xe_svm.h | 13 ++++++++++++ > > > 4 files changed, 52 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c > > > index ad804b6f9e84..ae71fcbe5380 100644 > > > --- a/drivers/gpu/drm/xe/xe_bo.c > > > +++ b/drivers/gpu/drm/xe/xe_bo.c > > > @@ -25,6 +25,7 @@ > > > #include "xe_pm.h" > > > #include "xe_preempt_fence.h" > > > #include "xe_res_cursor.h" > > > +#include "xe_svm.h" > > > #include "xe_trace_bo.h" > > > #include "xe_ttm_stolen_mgr.h" > > > #include "xe_vm.h" > > > @@ -250,6 +251,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, > > > static void xe_evict_flags(struct ttm_buffer_object *tbo, > > > struct ttm_placement *placement) > > > { > > > + struct xe_bo *bo; > > > + > > > if (!xe_bo_is_xe_bo(tbo)) { > > > /* Don't handle scatter gather BOs */ > > > if (tbo->type == ttm_bo_type_sg) { > > > @@ -261,6 +264,12 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, > > > return; > > > } > > > > > > + bo = ttm_to_xe_bo(tbo); > > > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) { > > > + *placement = sys_placement; > > > + return; > > > + } > > > + > > > /* > > > * For xe, sg bos that are evicted to system just triggers a > > > * rebind of the sg list upon subsequent validation to XE_PL_TT. > > > @@ -758,6 +767,17 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, > > > } > > > } > > > > > > + if (!move_lacks_source && (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) && > > > + new_mem->mem_type == XE_PL_SYSTEM) { > > > + ret = xe_svm_range_evict(bo->range); > > > + if (!ret) { > > > + drm_dbg(&xe->drm, "Evict system allocator BO success\n"); > > > + ttm_bo_move_null(ttm_bo, new_mem); > > > + } > > > + > > > + goto out; > > > + } > > > + > > > if (!move_lacks_source && > > > ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || > > > (mem_type_is_vram(old_mem_type) && > > > @@ -1096,6 +1116,19 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) > > > } > > > } > > > > > > +static bool xe_bo_eviction_valuable(struct ttm_buffer_object *ttm_bo, > > > + const struct ttm_place *place) > > > +{ > > > + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); > > > + > > > + /* Do not evict SVMs before having a binding */ > > > + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC && > > > + !xe_svm_range_has_vram_binding(bo->range)) > > > + return false; > > > + > > > + return ttm_bo_eviction_valuable(ttm_bo, place); > > > +} > > > + > > > const struct ttm_device_funcs xe_ttm_funcs = { > > > .ttm_tt_create = xe_ttm_tt_create, > > > .ttm_tt_populate = xe_ttm_tt_populate, > > > @@ -1106,7 +1139,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { > > > .io_mem_reserve = xe_ttm_io_mem_reserve, > > > .io_mem_pfn = xe_ttm_io_mem_pfn, > > > .release_notify = xe_ttm_bo_release_notify, > > > - .eviction_valuable = ttm_bo_eviction_valuable, > > > + .eviction_valuable = xe_bo_eviction_valuable, > > > .delete_mem_notify = xe_ttm_bo_delete_mem_notify, > > > }; > > > > > > diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h > > > index 2ed558ac2264..4523b033417c 100644 > > > --- a/drivers/gpu/drm/xe/xe_bo_types.h > > > +++ b/drivers/gpu/drm/xe/xe_bo_types.h > > > @@ -16,6 +16,7 @@ > > > #include "xe_ggtt_types.h" > > > > > > struct xe_device; > > > +struct xe_svm_range; > > > struct xe_vm; > > > > > > #define XE_BO_MAX_PLACEMENTS 3 > > > @@ -47,6 +48,8 @@ struct xe_bo { > > > struct ttm_bo_kmap_obj kmap; > > > /** @pinned_link: link to present / evicted list of pinned BO */ > > > struct list_head pinned_link; > > > + /** @range: SVM range for BO */ > > > + struct xe_svm_range *range; > > > #ifdef CONFIG_PROC_FS > > > /** > > > * @client: @xe_drm_client which created the bo > > > diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c > > > index fd8987e0a506..dc9810828c0a 100644 > > > --- a/drivers/gpu/drm/xe/xe_svm.c > > > +++ b/drivers/gpu/drm/xe/xe_svm.c > > > @@ -531,6 +531,8 @@ static struct xe_bo *xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, > > > range->base.va.start, ttm_bo_type_device, > > > XE_BO_FLAG_VRAM_IF_DGFX(tile) | > > > XE_BO_FLAG_SYSTEM_ALLOC | XE_BO_FLAG_SKIP_CLEAR); > > > + if (!IS_ERR(bo)) > > > + bo->range = range; > > > xe_vm_unlock(vm); > > > if (IS_ERR(bo)) { > > > err = PTR_ERR(bo); > > > diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h > > > index 3f432483a230..b9cf0e2500da 100644 > > > --- a/drivers/gpu/drm/xe/xe_svm.h > > > +++ b/drivers/gpu/drm/xe/xe_svm.h > > > @@ -46,6 +46,19 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) > > > return range->base.flags.has_dma_mapping; > > > } > > > > > > +static inline bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) > > > +{ > > > + return xe_svm_range_in_vram(range) && range->tile_present; > > > +} > > > + > > > +static inline int xe_svm_range_evict(struct xe_svm_range *range) > > > +{ > > > + struct drm_gpusvm_ctx ctx = { .trylock_mmap = true, }; > > > > So even trying to acquire an mmap lock for eviction is I think a design > > bug for svm memory ranges. It's a bunch of physical memory, you have no > > idea how many mm/vma map it and which one you pick as the special one is > > fairly arbitrary. > > > > Let me drop whole trylock this and just evict via > drm_gpusvm_evict_to_sram / migrate_device_vma_range which does not > require the mmap. I add this code very recently per Matt Auld suggestion > and agree it makes the try locking unnecessary. > > > So dont, eviction should entirely ignore va/mm issues at the top level > > like the migrate_device_range function does (maybe we need a > > scatter-gather version of that instead of just a range. > > > > I needed to add migrate_device_vma_range (might be a bad name) as VRAM > may be non-continuous pfns when memory pressure exists where as > migrate_device_range only supports continuous pfns. Ah, I think that's another fallout of tying vram allocations and management too closely to the gpusvm->mm va layout. Makes sense under the assumptions of your design at least. So I think we can file that under the large discussion item of per page/folio or per-range gpusvm design. -Sima > > That function internally makes sure you're in sync with any vma/vm by: > > - installing migration ptes everywhere, which does the mmu_notifer dance > > - locking the pages to prevent other concurrent migration or other fun > > stuff from happening > > - then restore ptes to something sensisble when it's all done > > > > And it does that by looping over _all_ possible mappings of a page with > > the rmap_walk infrastructure. > > > > The only reason when we need the mmap lock (or vma lock or whatever) is if > > we need to be coherent with other concurrent mm updates of a specific mm. > > That should only be the case when migrating to vram, where the gpusvm->mm > > is the special one, and when migrating to sram due to cpu faults, where > > the vmf->vma->mm is special (and might at best have a tenous relationship > > to the gpusvm->mm). But that's the only cases where a specific mm and vma > > have any relevance to svm vram allocations. > > > > Thanks for the info. > > Matt > > > -Sima > > > > > + > > > + return drm_gpusvm_migrate_to_sram(range->base.gpusvm, &range->base, > > > + &ctx); > > > +} > > > + > > > #define xe_svm_notifier_lock(vm__) \ > > > drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm) > > > > > > -- > > > 2.34.1 > > > > > > > -- > > Daniel Vetter > > Software Engineer, Intel Corporation > > http://blog.ffwll.ch
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ad804b6f9e84..ae71fcbe5380 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -25,6 +25,7 @@ #include "xe_pm.h" #include "xe_preempt_fence.h" #include "xe_res_cursor.h" +#include "xe_svm.h" #include "xe_trace_bo.h" #include "xe_ttm_stolen_mgr.h" #include "xe_vm.h" @@ -250,6 +251,8 @@ int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo, static void xe_evict_flags(struct ttm_buffer_object *tbo, struct ttm_placement *placement) { + struct xe_bo *bo; + if (!xe_bo_is_xe_bo(tbo)) { /* Don't handle scatter gather BOs */ if (tbo->type == ttm_bo_type_sg) { @@ -261,6 +264,12 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo, return; } + bo = ttm_to_xe_bo(tbo); + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) { + *placement = sys_placement; + return; + } + /* * For xe, sg bos that are evicted to system just triggers a * rebind of the sg list upon subsequent validation to XE_PL_TT. @@ -758,6 +767,17 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict, } } + if (!move_lacks_source && (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC) && + new_mem->mem_type == XE_PL_SYSTEM) { + ret = xe_svm_range_evict(bo->range); + if (!ret) { + drm_dbg(&xe->drm, "Evict system allocator BO success\n"); + ttm_bo_move_null(ttm_bo, new_mem); + } + + goto out; + } + if (!move_lacks_source && ((old_mem_type == XE_PL_SYSTEM && resource_is_vram(new_mem)) || (mem_type_is_vram(old_mem_type) && @@ -1096,6 +1116,19 @@ static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo) } } +static bool xe_bo_eviction_valuable(struct ttm_buffer_object *ttm_bo, + const struct ttm_place *place) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + + /* Do not evict SVMs before having a binding */ + if (bo->flags & XE_BO_FLAG_SYSTEM_ALLOC && + !xe_svm_range_has_vram_binding(bo->range)) + return false; + + return ttm_bo_eviction_valuable(ttm_bo, place); +} + const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, @@ -1106,7 +1139,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .io_mem_reserve = xe_ttm_io_mem_reserve, .io_mem_pfn = xe_ttm_io_mem_pfn, .release_notify = xe_ttm_bo_release_notify, - .eviction_valuable = ttm_bo_eviction_valuable, + .eviction_valuable = xe_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, }; diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 2ed558ac2264..4523b033417c 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -16,6 +16,7 @@ #include "xe_ggtt_types.h" struct xe_device; +struct xe_svm_range; struct xe_vm; #define XE_BO_MAX_PLACEMENTS 3 @@ -47,6 +48,8 @@ struct xe_bo { struct ttm_bo_kmap_obj kmap; /** @pinned_link: link to present / evicted list of pinned BO */ struct list_head pinned_link; + /** @range: SVM range for BO */ + struct xe_svm_range *range; #ifdef CONFIG_PROC_FS /** * @client: @xe_drm_client which created the bo diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c index fd8987e0a506..dc9810828c0a 100644 --- a/drivers/gpu/drm/xe/xe_svm.c +++ b/drivers/gpu/drm/xe/xe_svm.c @@ -531,6 +531,8 @@ static struct xe_bo *xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile, range->base.va.start, ttm_bo_type_device, XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_SYSTEM_ALLOC | XE_BO_FLAG_SKIP_CLEAR); + if (!IS_ERR(bo)) + bo->range = range; xe_vm_unlock(vm); if (IS_ERR(bo)) { err = PTR_ERR(bo); diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h index 3f432483a230..b9cf0e2500da 100644 --- a/drivers/gpu/drm/xe/xe_svm.h +++ b/drivers/gpu/drm/xe/xe_svm.h @@ -46,6 +46,19 @@ static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range) return range->base.flags.has_dma_mapping; } +static inline bool xe_svm_range_has_vram_binding(struct xe_svm_range *range) +{ + return xe_svm_range_in_vram(range) && range->tile_present; +} + +static inline int xe_svm_range_evict(struct xe_svm_range *range) +{ + struct drm_gpusvm_ctx ctx = { .trylock_mmap = true, }; + + return drm_gpusvm_migrate_to_sram(range->base.gpusvm, &range->base, + &ctx); +} + #define xe_svm_notifier_lock(vm__) \ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
Wire xe_bo_move to GPUSVM migration to SRAM with trylocking of mmap lock. Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- drivers/gpu/drm/xe/xe_bo.c | 35 +++++++++++++++++++++++++++++++- drivers/gpu/drm/xe/xe_bo_types.h | 3 +++ drivers/gpu/drm/xe/xe_svm.c | 2 ++ drivers/gpu/drm/xe/xe_svm.h | 13 ++++++++++++ 4 files changed, 52 insertions(+), 1 deletion(-)