diff mbox

drm/radeon: fix TOPDOWN handling for bo_create

Message ID 1426088652-32727-1-git-send-email-alexander.deucher@amd.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Deucher March 11, 2015, 3:44 p.m. UTC
radeon_bo_create() calls radeon_ttm_placement_from_domain()
before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
uses the ttm bo size to determine when to select top down
allocation but since the ttm bo is not initialized yet the
check is always false.

Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/radeon/radeon.h        |  3 ++-
 drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
 drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
 drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
 drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
 5 files changed, 22 insertions(+), 14 deletions(-)

Comments

Christian König March 11, 2015, 6:21 p.m. UTC | #1
On 11.03.2015 16:44, Alex Deucher wrote:
> radeon_bo_create() calls radeon_ttm_placement_from_domain()
> before ttm_bo_init() is called.  radeon_ttm_placement_from_domain()
> uses the ttm bo size to determine when to select top down
> allocation but since the ttm bo is not initialized yet the
> check is always false.
>
> Noticed-by: Oded Gabbay <oded.gabbay@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> Cc: stable@vger.kernel.org

And I was already wondering why the heck the BOs always made this 
ping/pong in memory after creation.

Patch is Reviewed-by: Christian König <christian.koenig@amd.com>

Regards,
Christian.

> ---
>   drivers/gpu/drm/radeon/radeon.h        |  3 ++-
>   drivers/gpu/drm/radeon/radeon_gem.c    |  2 +-
>   drivers/gpu/drm/radeon/radeon_mn.c     |  2 +-
>   drivers/gpu/drm/radeon/radeon_object.c | 17 ++++++++++-------
>   drivers/gpu/drm/radeon/radeon_ttm.c    | 12 ++++++++----
>   5 files changed, 22 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 5587603..726e89f 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -2970,7 +2970,8 @@ extern void radeon_surface_init(struct radeon_device *rdev);
>   extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
>   extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
>   extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
> -extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
> +extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
> +					     u64 size);
>   extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
>   extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
>   				     uint32_t flags);
> diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
> index ac3c131..d613d0c 100644
> --- a/drivers/gpu/drm/radeon/radeon_gem.c
> +++ b/drivers/gpu/drm/radeon/radeon_gem.c
> @@ -337,7 +337,7 @@ int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
>   			goto release_object;
>   		}
>   
> -		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
> +		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
>   		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
>   		radeon_bo_unreserve(bo);
>   		up_read(&current->mm->mmap_sem);
> diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
> index a69bd44..e51f09b 100644
> --- a/drivers/gpu/drm/radeon/radeon_mn.c
> +++ b/drivers/gpu/drm/radeon/radeon_mn.c
> @@ -141,7 +141,7 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
>   				DRM_ERROR("(%d) failed to wait for user bo\n", r);
>   		}
>   
> -		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
> +		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
>   		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
>   		if (r)
>   			DRM_ERROR("(%d) failed to validate user bo\n", r);
> diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
> index 43e0994..07f8fd5 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.c
> +++ b/drivers/gpu/drm/radeon/radeon_object.c
> @@ -93,7 +93,8 @@ bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
>   	return false;
>   }
>   
> -void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
> +void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
> +				      u64 size)
>   {
>   	u32 c = 0, i;
>   
> @@ -179,7 +180,7 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
>   	 * improve fragmentation quality.
>   	 * 512kb was measured as the most optimal number.
>   	 */
> -	if (rbo->tbo.mem.size > 512 * 1024) {
> +	if (size > 512 * 1024) {
>   		for (i = 0; i < c; i++) {
>   			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
>   		}
> @@ -252,7 +253,7 @@ int radeon_bo_create(struct radeon_device *rdev,
>   	bo->flags &= ~RADEON_GEM_GTT_WC;
>   #endif
>   
> -	radeon_ttm_placement_from_domain(bo, domain);
> +	radeon_ttm_placement_from_domain(bo, domain, size);
>   	/* Kernel allocation are uninterruptible */
>   	down_read(&rdev->pm.mclk_lock);
>   	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
> @@ -350,7 +351,7 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
>   
>   		return 0;
>   	}
> -	radeon_ttm_placement_from_domain(bo, domain);
> +	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>   	for (i = 0; i < bo->placement.num_placement; i++) {
>   		/* force to pin into visible video ram */
>   		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
> @@ -557,7 +558,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
>   			}
>   
>   		retry:
> -			radeon_ttm_placement_from_domain(bo, domain);
> +			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
>   			if (ring == R600_RING_TYPE_UVD_INDEX)
>   				radeon_uvd_force_into_uvd_segment(bo, allowed);
>   
> @@ -800,7 +801,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   		return 0;
>   
>   	/* hurrah the memory is not visible ! */
> -	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
> +	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
> +					 rbo->tbo.mem.size);
>   	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
>   	for (i = 0; i < rbo->placement.num_placement; i++) {
>   		/* Force into visible VRAM */
> @@ -810,7 +812,8 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
>   	}
>   	r = ttm_bo_validate(bo, &rbo->placement, false, false);
>   	if (unlikely(r == -ENOMEM)) {
> -		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
> +		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
> +						 rbo->tbo.mem.size);
>   		return ttm_bo_validate(bo, &rbo->placement, false, false);
>   	} else if (unlikely(r != 0)) {
>   		return r;
> diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
> index d02aa1d..ce8ed2d 100644
> --- a/drivers/gpu/drm/radeon/radeon_ttm.c
> +++ b/drivers/gpu/drm/radeon/radeon_ttm.c
> @@ -197,7 +197,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   	switch (bo->mem.mem_type) {
>   	case TTM_PL_VRAM:
>   		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
> -			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
> +			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
> +							 rbo->tbo.mem.size);
>   		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
>   			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
>   			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
> @@ -209,7 +210,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   			 * BOs to be evicted from VRAM
>   			 */
>   			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
> -							 RADEON_GEM_DOMAIN_GTT);
> +							 RADEON_GEM_DOMAIN_GTT,
> +							 rbo->tbo.mem.size);
>   			rbo->placement.num_busy_placement = 0;
>   			for (i = 0; i < rbo->placement.num_placement; i++) {
>   				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
> @@ -222,11 +224,13 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
>   				}
>   			}
>   		} else
> -			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
> +			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
> +							 rbo->tbo.mem.size);
>   		break;
>   	case TTM_PL_TT:
>   	default:
> -		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
> +		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
> +						 rbo->tbo.mem.size);
>   	}
>   	*placement = rbo->placement;
>   }
diff mbox

Patch

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5587603..726e89f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -2970,7 +2970,8 @@  extern void radeon_surface_init(struct radeon_device *rdev);
 extern int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data);
 extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
-extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
+extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+					     u64 size);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
 extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
 				     uint32_t flags);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index ac3c131..d613d0c 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -337,7 +337,7 @@  int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
 			goto release_object;
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
 		radeon_bo_unreserve(bo);
 		up_read(&current->mm->mmap_sem);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index a69bd44..e51f09b 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -141,7 +141,7 @@  static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
 				DRM_ERROR("(%d) failed to wait for user bo\n", r);
 		}
 
-		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU, bo->tbo.mem.size);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 		if (r)
 			DRM_ERROR("(%d) failed to validate user bo\n", r);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 43e0994..07f8fd5 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -93,7 +93,8 @@  bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
 	return false;
 }
 
-void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
+void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain,
+				      u64 size)
 {
 	u32 c = 0, i;
 
@@ -179,7 +180,7 @@  void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
 			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
@@ -252,7 +253,7 @@  int radeon_bo_create(struct radeon_device *rdev,
 	bo->flags &= ~RADEON_GEM_GTT_WC;
 #endif
 
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, size);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
@@ -350,7 +351,7 @@  int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
-	radeon_ttm_placement_from_domain(bo, domain);
+	radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
 		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
@@ -557,7 +558,7 @@  int radeon_bo_list_validate(struct radeon_device *rdev,
 			}
 
 		retry:
-			radeon_ttm_placement_from_domain(bo, domain);
+			radeon_ttm_placement_from_domain(bo, domain, bo->tbo.mem.size);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
@@ -800,7 +801,8 @@  int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return 0;
 
 	/* hurrah the memory is not visible ! */
-	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM,
+					 rbo->tbo.mem.size);
 	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < rbo->placement.num_placement; i++) {
 		/* Force into visible VRAM */
@@ -810,7 +812,8 @@  int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+						 rbo->tbo.mem.size);
 		return ttm_bo_validate(bo, &rbo->placement, false, false);
 	} else if (unlikely(r != 0)) {
 		return r;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index d02aa1d..ce8ed2d 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -197,7 +197,8 @@  static void radeon_evict_flags(struct ttm_buffer_object *bo,
 	switch (bo->mem.mem_type) {
 	case TTM_PL_VRAM:
 		if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false)
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+							 rbo->tbo.mem.size);
 		else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size &&
 			 bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) {
 			unsigned fpfn = rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
@@ -209,7 +210,8 @@  static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			 * BOs to be evicted from VRAM
 			 */
 			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM |
-							 RADEON_GEM_DOMAIN_GTT);
+							 RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
@@ -222,11 +224,13 @@  static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				}
 			}
 		} else
-			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
+			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT,
+							 rbo->tbo.mem.size);
 		break;
 	case TTM_PL_TT:
 	default:
-		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
+		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU,
+						 rbo->tbo.mem.size);
 	}
 	*placement = rbo->placement;
 }