Message ID | 1465991045-2328-4-git-send-email-deathsimple@vodafone.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, Jun 15, 2016 at 7:44 AM, Christian König <deathsimple@vodafone.de> wrote: > From: Christian König <christian.koenig@amd.com> > > Free up the memory immediately, remember the last eviction for each domain and > make new allocations depend on the last eviction to be completed. > > Signed-off-by: Christian König <christian.koenig@amd.com> Minor typo in the patch title: s/infrastructur/infrastructure/ Alex > --- > drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++++++++--- > drivers/gpu/drm/ttm/ttm_bo_util.c | 92 +++++++++++++++++++++++++++++++++++++++ > include/drm/ttm/ttm_bo_driver.h | 24 ++++++++++ > 3 files changed, 160 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index 28cd535..5d93169 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -788,6 +788,34 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) > EXPORT_SYMBOL(ttm_bo_mem_put); > > /** > + * Add the last move fence to the BO and reserve a new shared slot. > + */ > +static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, > + struct ttm_mem_type_manager *man, > + struct ttm_mem_reg *mem) > +{ > + struct fence *fence; > + int ret; > + > + spin_lock(&man->move_lock); > + fence = fence_get(man->move); > + spin_unlock(&man->move_lock); > + > + if (fence) { > + reservation_object_add_shared_fence(bo->resv, fence); > + > + ret = reservation_object_reserve_shared(bo->resv); > + if (unlikely(ret)) > + return ret; > + > + fence_put(bo->moving); > + bo->moving = fence; > + } > + > + return 0; > +} > + > +/** > * Repeatedly evict memory from the LRU for @mem_type until we create enough > * space, or we've evicted everything and there isn't enough space. > */ > @@ -813,10 +841,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, > if (unlikely(ret != 0)) > return ret; > } while (1); > - if (mem->mm_node == NULL) > - return -ENOMEM; > mem->mem_type = mem_type; > - return 0; > + return ttm_bo_add_move_fence(bo, man, mem); > } > > static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, > @@ -886,6 +912,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, > bool has_erestartsys = false; > int i, ret; > > + ret = reservation_object_reserve_shared(bo->resv); > + if (unlikely(ret)) > + return ret; > + > mem->mm_node = NULL; > for (i = 0; i < placement->num_placement; ++i) { > const struct ttm_place *place = &placement->placement[i]; > @@ -919,9 +949,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, > ret = (*man->func->get_node)(man, bo, place, mem); > if (unlikely(ret)) > return ret; > - > - if (mem->mm_node) > + > + if (mem->mm_node) { > + ret = ttm_bo_add_move_fence(bo, man, mem); > + if (unlikely(ret)) { > + (*man->func->put_node)(man, mem); > + return ret; > + } > break; > + } > } > > if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) { > @@ -1290,6 +1326,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) > mem_type); > return ret; > } > + fence_put(man->move); > > man->use_type = false; > man->has_type = false; > @@ -1335,6 +1372,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > man->io_reserve_fastpath = true; > man->use_io_reserve_lru = false; > mutex_init(&man->io_reserve_mutex); > + spin_lock_init(&man->move_lock); > INIT_LIST_HEAD(&man->io_reserve_lru); > > ret = bdev->driver->init_mem_type(bdev, type, man); > @@ -1353,6 +1391,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > man->size = p_size; > > INIT_LIST_HEAD(&man->lru); > + man->move = NULL; > > return 0; > } > diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c > index 9ea8d02..0c389a5 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo_util.c > +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c > @@ -696,3 +696,95 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, > return 0; > } > EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); > + > +int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, > + struct fence *fence, bool evict, > + struct ttm_mem_reg *new_mem) > +{ > + struct ttm_bo_device *bdev = bo->bdev; > + struct ttm_mem_reg *old_mem = &bo->mem; > + > + struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type]; > + struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type]; > + > + int ret; > + > + reservation_object_add_excl_fence(bo->resv, fence); > + > + if (!evict) { > + struct ttm_buffer_object *ghost_obj; > + > + /** > + * This should help pipeline ordinary buffer moves. > + * > + * Hang old buffer memory on a new buffer object, > + * and leave it to be released when the GPU > + * operation has completed. > + */ > + > + fence_put(bo->moving); > + bo->moving = fence_get(fence); > + > + ret = ttm_buffer_object_transfer(bo, &ghost_obj); > + if (ret) > + return ret; > + > + reservation_object_add_excl_fence(ghost_obj->resv, fence); > + > + /** > + * If we're not moving to fixed memory, the TTM object > + * needs to stay alive. Otherwhise hang it on the ghost > + * bo to be unbound and destroyed. > + */ > + > + if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED)) > + ghost_obj->ttm = NULL; > + else > + bo->ttm = NULL; > + > + ttm_bo_unreserve(ghost_obj); > + ttm_bo_unref(&ghost_obj); > + > + } else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) { > + > + /** > + * BO doesn't have a TTM we need to bind/unbind. Just remember > + * this eviction and free up the allocation > + */ > + > + spin_lock(&from->move_lock); > + if (!from->move || fence_is_later(from->move, fence)) { > + fence_put(from->move); > + from->move = fence_get(fence); > + } > + spin_unlock(&from->move_lock); > + > + ttm_bo_free_old_node(bo); > + > + fence_put(bo->moving); > + bo->moving = fence_get(fence); > + > + } else { > + /** > + * Last resort, wait for the move to be completed. > + * > + * Should never happen in pratice. > + */ > + > + ret = ttm_bo_wait(bo, false, false); > + if (ret) > + return ret; > + > + if (to->flags & TTM_MEMTYPE_FLAG_FIXED) { > + ttm_tt_destroy(bo->ttm); > + bo->ttm = NULL; > + } > + ttm_bo_free_old_node(bo); > + } > + > + *old_mem = *new_mem; > + new_mem->mm_node = NULL; > + > + return 0; > +} > +EXPORT_SYMBOL(ttm_bo_pipeline_move); > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index 44dea22..e2ebe66 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -258,8 +258,10 @@ struct ttm_mem_type_manager_func { > * reserved by the TTM vm system. > * @io_reserve_lru: Optional lru list for unreserving io mem regions. > * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain > + * @move_lock: lock for move fence > * static information. bdev::driver::io_mem_free is never used. > * @lru: The lru list for this memory type. > + * @move: The fence of the last pipelined move operation. > * > * This structure is used to identify and manage memory types for a device. > * It's set up by the ttm_bo_driver::init_mem_type method. > @@ -286,6 +288,7 @@ struct ttm_mem_type_manager { > struct mutex io_reserve_mutex; > bool use_io_reserve_lru; > bool io_reserve_fastpath; > + spinlock_t move_lock; > > /* > * Protected by @io_reserve_mutex: > @@ -298,6 +301,11 @@ struct ttm_mem_type_manager { > */ > > struct list_head lru; > + > + /* > + * Protected by @move_lock. > + */ > + struct fence *move; > }; > > /** > @@ -1014,6 +1022,22 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo); > extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, > struct fence *fence, bool evict, > struct ttm_mem_reg *new_mem); > + > +/** > + * ttm_bo_pipeline_move. > + * > + * @bo: A pointer to a struct ttm_buffer_object. > + * @fence: A fence object that signals when moving is complete. > + * @evict: This is an evict move. Don't return until the buffer is idle. > + * @new_mem: struct ttm_mem_reg indicating where to move. > + * > + * Function for pipelining accelerated moves. Either free the memory > + * immediately or hang it on a temporary buffer object. > + */ > +int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, > + struct fence *fence, bool evict, > + struct ttm_mem_reg *new_mem); > + > /** > * ttm_io_prot > * > -- > 2.5.0 > > _______________________________________________ > dri-devel mailing list > dri-devel@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel
Christian, Thanks for doing this! A question below: On 06/15/2016 01:44 PM, Christian König wrote: > From: Christian König <christian.koenig@amd.com> > > Free up the memory immediately, remember the last eviction for each domain and > make new allocations depend on the last eviction to be completed. > > Signed-off-by: Christian König <christian.koenig@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++++++++--- > drivers/gpu/drm/ttm/ttm_bo_util.c | 92 +++++++++++++++++++++++++++++++++++++++ > include/drm/ttm/ttm_bo_driver.h | 24 ++++++++++ > 3 files changed, 160 insertions(+), 5 deletions(-) > > > > /* > * Protected by @io_reserve_mutex: > @@ -298,6 +301,11 @@ struct ttm_mem_type_manager { > */ > > struct list_head lru; > + > + /* > + * Protected by @move_lock. > + */ > + struct fence *move; > }; > Did you look at protecting the move fence with RCU? I figure where there actually is a fence it doesn't matter much but in the fastpath where move is NULL we'd be able to get rid of a number of locking cycles. I guess though there might be both licensing implications and requirements to using kfree_rcu() to free the fence. /Thomas
Am 02.07.2016 um 10:39 schrieb Thomas Hellstrom: > Christian, > > Thanks for doing this! > A question below: > > On 06/15/2016 01:44 PM, Christian König wrote: >> From: Christian König <christian.koenig@amd.com> >> >> Free up the memory immediately, remember the last eviction for each domain and >> make new allocations depend on the last eviction to be completed. >> >> Signed-off-by: Christian König <christian.koenig@amd.com> >> --- >> drivers/gpu/drm/ttm/ttm_bo.c | 49 ++++++++++++++++++--- >> drivers/gpu/drm/ttm/ttm_bo_util.c | 92 +++++++++++++++++++++++++++++++++++++++ >> include/drm/ttm/ttm_bo_driver.h | 24 ++++++++++ >> 3 files changed, 160 insertions(+), 5 deletions(-) >> >> >> >> /* >> * Protected by @io_reserve_mutex: >> @@ -298,6 +301,11 @@ struct ttm_mem_type_manager { >> */ >> >> struct list_head lru; >> + >> + /* >> + * Protected by @move_lock. >> + */ >> + struct fence *move; >> }; >> > Did you look at protecting the move fence with RCU? I figure where there > actually is a fence it doesn't matter much but in the fastpath where > move is NULL we'd be able to get rid of a number of locking cycles. Yeah, thought about that as well. > > I guess though there might be both licensing implications and > requirements to using kfree_rcu() to free the fence. The problem wasn't licensing (but it is good that you point that out I wasn't aware of this problem), but that that the existing fence RCU function wouldn't worked here and I didn't had time to take a closer look. Should be relative easy to switch the read path over, because we already free the fences RCU protected. Regards, Christian. > > /Thomas > > >
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 28cd535..5d93169 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -788,6 +788,34 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem) EXPORT_SYMBOL(ttm_bo_mem_put); /** + * Add the last move fence to the BO and reserve a new shared slot. + */ +static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo, + struct ttm_mem_type_manager *man, + struct ttm_mem_reg *mem) +{ + struct fence *fence; + int ret; + + spin_lock(&man->move_lock); + fence = fence_get(man->move); + spin_unlock(&man->move_lock); + + if (fence) { + reservation_object_add_shared_fence(bo->resv, fence); + + ret = reservation_object_reserve_shared(bo->resv); + if (unlikely(ret)) + return ret; + + fence_put(bo->moving); + bo->moving = fence; + } + + return 0; +} + +/** * Repeatedly evict memory from the LRU for @mem_type until we create enough * space, or we've evicted everything and there isn't enough space. */ @@ -813,10 +841,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, if (unlikely(ret != 0)) return ret; } while (1); - if (mem->mm_node == NULL) - return -ENOMEM; mem->mem_type = mem_type; - return 0; + return ttm_bo_add_move_fence(bo, man, mem); } static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man, @@ -886,6 +912,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, bool has_erestartsys = false; int i, ret; + ret = reservation_object_reserve_shared(bo->resv); + if (unlikely(ret)) + return ret; + mem->mm_node = NULL; for (i = 0; i < placement->num_placement; ++i) { const struct ttm_place *place = &placement->placement[i]; @@ -919,9 +949,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo, ret = (*man->func->get_node)(man, bo, place, mem); if (unlikely(ret)) return ret; - - if (mem->mm_node) + + if (mem->mm_node) { + ret = ttm_bo_add_move_fence(bo, man, mem); + if (unlikely(ret)) { + (*man->func->put_node)(man, mem); + return ret; + } break; + } } if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) { @@ -1290,6 +1326,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type) mem_type); return ret; } + fence_put(man->move); man->use_type = false; man->has_type = false; @@ -1335,6 +1372,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, man->io_reserve_fastpath = true; man->use_io_reserve_lru = false; mutex_init(&man->io_reserve_mutex); + spin_lock_init(&man->move_lock); INIT_LIST_HEAD(&man->io_reserve_lru); ret = bdev->driver->init_mem_type(bdev, type, man); @@ -1353,6 +1391,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, man->size = p_size; INIT_LIST_HEAD(&man->lru); + man->move = NULL; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 9ea8d02..0c389a5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -696,3 +696,95 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, return 0; } EXPORT_SYMBOL(ttm_bo_move_accel_cleanup); + +int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, + struct fence *fence, bool evict, + struct ttm_mem_reg *new_mem) +{ + struct ttm_bo_device *bdev = bo->bdev; + struct ttm_mem_reg *old_mem = &bo->mem; + + struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type]; + struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type]; + + int ret; + + reservation_object_add_excl_fence(bo->resv, fence); + + if (!evict) { + struct ttm_buffer_object *ghost_obj; + + /** + * This should help pipeline ordinary buffer moves. + * + * Hang old buffer memory on a new buffer object, + * and leave it to be released when the GPU + * operation has completed. + */ + + fence_put(bo->moving); + bo->moving = fence_get(fence); + + ret = ttm_buffer_object_transfer(bo, &ghost_obj); + if (ret) + return ret; + + reservation_object_add_excl_fence(ghost_obj->resv, fence); + + /** + * If we're not moving to fixed memory, the TTM object + * needs to stay alive. Otherwhise hang it on the ghost + * bo to be unbound and destroyed. + */ + + if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED)) + ghost_obj->ttm = NULL; + else + bo->ttm = NULL; + + ttm_bo_unreserve(ghost_obj); + ttm_bo_unref(&ghost_obj); + + } else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) { + + /** + * BO doesn't have a TTM we need to bind/unbind. Just remember + * this eviction and free up the allocation + */ + + spin_lock(&from->move_lock); + if (!from->move || fence_is_later(from->move, fence)) { + fence_put(from->move); + from->move = fence_get(fence); + } + spin_unlock(&from->move_lock); + + ttm_bo_free_old_node(bo); + + fence_put(bo->moving); + bo->moving = fence_get(fence); + + } else { + /** + * Last resort, wait for the move to be completed. + * + * Should never happen in pratice. + */ + + ret = ttm_bo_wait(bo, false, false); + if (ret) + return ret; + + if (to->flags & TTM_MEMTYPE_FLAG_FIXED) { + ttm_tt_destroy(bo->ttm); + bo->ttm = NULL; + } + ttm_bo_free_old_node(bo); + } + + *old_mem = *new_mem; + new_mem->mm_node = NULL; + + return 0; +} +EXPORT_SYMBOL(ttm_bo_pipeline_move); diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 44dea22..e2ebe66 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -258,8 +258,10 @@ struct ttm_mem_type_manager_func { * reserved by the TTM vm system. * @io_reserve_lru: Optional lru list for unreserving io mem regions. * @io_reserve_fastpath: Only use bdev::driver::io_mem_reserve to obtain + * @move_lock: lock for move fence * static information. bdev::driver::io_mem_free is never used. * @lru: The lru list for this memory type. + * @move: The fence of the last pipelined move operation. * * This structure is used to identify and manage memory types for a device. * It's set up by the ttm_bo_driver::init_mem_type method. @@ -286,6 +288,7 @@ struct ttm_mem_type_manager { struct mutex io_reserve_mutex; bool use_io_reserve_lru; bool io_reserve_fastpath; + spinlock_t move_lock; /* * Protected by @io_reserve_mutex: @@ -298,6 +301,11 @@ struct ttm_mem_type_manager { */ struct list_head lru; + + /* + * Protected by @move_lock. + */ + struct fence *move; }; /** @@ -1014,6 +1022,22 @@ extern void ttm_bo_free_old_node(struct ttm_buffer_object *bo); extern int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, struct fence *fence, bool evict, struct ttm_mem_reg *new_mem); + +/** + * ttm_bo_pipeline_move. + * + * @bo: A pointer to a struct ttm_buffer_object. + * @fence: A fence object that signals when moving is complete. + * @evict: This is an evict move. Don't return until the buffer is idle. + * @new_mem: struct ttm_mem_reg indicating where to move. + * + * Function for pipelining accelerated moves. Either free the memory + * immediately or hang it on a temporary buffer object. + */ +int ttm_bo_pipeline_move(struct ttm_buffer_object *bo, + struct fence *fence, bool evict, + struct ttm_mem_reg *new_mem); + /** * ttm_io_prot *