diff mbox

[v4,01/38] drm/i915: Use the MRU stack search after evicting

Message ID 20161222083641.2691-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Dec. 22, 2016, 8:36 a.m. UTC
When we evict from the GTT to make room for an object, the hole we
create is put onto the MRU stack inside the drm_mm range manager. On the
next search pass, we can speed up a PIN_HIGH allocation by referencing
that stack for the new hole.

v2: Pull together the 3 identical implements (ahem, a couple were
outdated) into a common routine for allocating a node and evicting as
necessary.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++++-----------
 drivers/gpu/drm/i915/i915_gem_gtt.c    | 72 ++++++++++++++++++++++++----------
 drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 +++
 drivers/gpu/drm/i915/i915_vma.c        | 40 ++-----------------
 4 files changed, 70 insertions(+), 80 deletions(-)

Comments

Daniel Vetter Dec. 27, 2016, 11:30 a.m. UTC | #1
On Thu, Dec 22, 2016 at 08:36:04AM +0000, Chris Wilson wrote:
> When we evict from the GTT to make room for an object, the hole we
> create is put onto the MRU stack inside the drm_mm range manager. On the
> next search pass, we can speed up a PIN_HIGH allocation by referencing
> that stack for the new hole.
> 
> v2: Pull together the 3 identical implements (ahem, a couple were
> outdated) into a common routine for allocating a node and evicting as
> necessary.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Since it doesn't apply directly to drm-misc I'm leaving this one out for
now. I guess best to merge through drm-intel?
-Daniel

> ---
>  drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++++-----------
>  drivers/gpu/drm/i915/i915_gem_gtt.c    | 72 ++++++++++++++++++++++++----------
>  drivers/gpu/drm/i915/i915_gem_gtt.h    |  5 +++
>  drivers/gpu/drm/i915/i915_vma.c        | 40 ++-----------------
>  4 files changed, 70 insertions(+), 80 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> index 7d33b607bc89..1bb7a5b80d47 100644
> --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
> +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
> @@ -48,47 +48,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
>  {
>  	struct intel_gvt *gvt = vgpu->gvt;
>  	struct drm_i915_private *dev_priv = gvt->dev_priv;
> -	u32 alloc_flag, search_flag;
> +	unsigned int flags;
>  	u64 start, end, size;
>  	struct drm_mm_node *node;
> -	int retried = 0;
>  	int ret;
>  
>  	if (high_gm) {
> -		search_flag = DRM_MM_SEARCH_BELOW;
> -		alloc_flag = DRM_MM_CREATE_TOP;
>  		node = &vgpu->gm.high_gm_node;
>  		size = vgpu_hidden_sz(vgpu);
>  		start = gvt_hidden_gmadr_base(gvt);
>  		end = gvt_hidden_gmadr_end(gvt);
> +		flags = PIN_HIGH;
>  	} else {
> -		search_flag = DRM_MM_SEARCH_DEFAULT;
> -		alloc_flag = DRM_MM_CREATE_DEFAULT;
>  		node = &vgpu->gm.low_gm_node;
>  		size = vgpu_aperture_sz(vgpu);
>  		start = gvt_aperture_gmadr_base(gvt);
>  		end = gvt_aperture_gmadr_end(gvt);
> +		flags = PIN_MAPPABLE;
>  	}
>  
>  	mutex_lock(&dev_priv->drm.struct_mutex);
> -search_again:
> -	ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
> -						  node, size, 4096,
> -						  I915_COLOR_UNEVICTABLE,
> -						  start, end, search_flag,
> -						  alloc_flag);
> -	if (ret) {
> -		ret = i915_gem_evict_something(&dev_priv->ggtt.base,
> -					       size, 4096,
> -					       I915_COLOR_UNEVICTABLE,
> -					       start, end, 0);
> -		if (ret == 0 && ++retried < 3)
> -			goto search_again;
> -
> -		gvt_err("fail to alloc %s gm space from host, retried %d\n",
> -				high_gm ? "high" : "low", retried);
> -	}
> +	ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
> +				  size, 4096, I915_COLOR_UNEVICTABLE,
> +				  start, end, flags);
>  	mutex_unlock(&dev_priv->drm.struct_mutex);
> +	if (ret)
> +		gvt_err("fail to alloc %s gm space from host\n",
> +			high_gm ? "high" : "low");
> +
>  	return ret;
>  }
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 6af9311f72f5..c8f1675852a7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -2044,7 +2044,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
>  	struct i915_address_space *vm = &ppgtt->base;
>  	struct drm_i915_private *dev_priv = ppgtt->base.i915;
>  	struct i915_ggtt *ggtt = &dev_priv->ggtt;
> -	bool retried = false;
>  	int ret;
>  
>  	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
> @@ -2057,29 +2056,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
>  	if (ret)
>  		return ret;
>  
> -alloc:
> -	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
> -						  GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -						  I915_COLOR_UNEVICTABLE,
> -						  0, ggtt->base.total,
> -						  DRM_MM_TOPDOWN);
> -	if (ret == -ENOSPC && !retried) {
> -		ret = i915_gem_evict_something(&ggtt->base,
> -					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
> -					       I915_COLOR_UNEVICTABLE,
> -					       0, ggtt->base.total,
> -					       0);
> -		if (ret)
> -			goto err_out;
> -
> -		retried = true;
> -		goto alloc;
> -	}
> -
> +	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
> +				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
> +				  I915_COLOR_UNEVICTABLE,
> +				  0, ggtt->base.total,
> +				  PIN_HIGH);
>  	if (ret)
>  		goto err_out;
>  
> -
>  	if (ppgtt->node.start < ggtt->mappable_end)
>  		DRM_DEBUG("Forced to use aperture for PDEs\n");
>  
> @@ -3553,3 +3537,49 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma)
>  	return ret;
>  }
>  
> +int i915_gem_gtt_insert(struct i915_address_space *vm,
> +			struct drm_mm_node *node,
> +			u64 size, u64 alignment, unsigned long color,
> +			u64 start, u64 end, unsigned int flags)
> +{
> +	u32 search_flag, alloc_flag;
> +	int err;
> +
> +	lockdep_assert_held(&vm->i915->drm.struct_mutex);
> +
> +	if (flags & PIN_HIGH) {
> +		search_flag = DRM_MM_SEARCH_BELOW;
> +		alloc_flag = DRM_MM_CREATE_TOP;
> +	} else {
> +		search_flag = DRM_MM_SEARCH_DEFAULT;
> +		alloc_flag = DRM_MM_CREATE_DEFAULT;
> +	}
> +
> +	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> +	 * so we know that we always have a minimum alignment of 4096.
> +	 * The drm_mm range manager is optimised to return results
> +	 * with zero alignment, so where possible use the optimal
> +	 * path.
> +	 */
> +	GEM_BUG_ON(size & 4095);
> +	if (alignment <= 4096)
> +		alignment = 0;
> +
> +	err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
> +						  size, alignment, color,
> +						  start, end,
> +						  search_flag, alloc_flag);
> +	if (err != -ENOSPC)
> +		return err;
> +
> +	err = i915_gem_evict_something(vm, size, alignment, color,
> +				       start, end, flags);
> +	if (err)
> +		return err;
> +
> +	search_flag = DRM_MM_SEARCH_DEFAULT;
> +	return drm_mm_insert_node_in_range_generic(&vm->mm, node,
> +						   size, alignment, color,
> +						   start, end,
> +						   search_flag, alloc_flag);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 0055b8567a43..4c7bef07e38a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -528,6 +528,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
>  void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
>  			       struct sg_table *pages);
>  
> +int i915_gem_gtt_insert(struct i915_address_space *vm,
> +			struct drm_mm_node *node,
> +			u64 size, u64 alignment, unsigned long color,
> +			u64 start, u64 end, unsigned int flags);
> +
>  /* Flags used by pin/bind&friends. */
>  #define PIN_NONBLOCK		BIT(0)
>  #define PIN_MAPPABLE		BIT(1)
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index fd75d5704287..608008d2d999 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -415,43 +415,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>  				goto err_unpin;
>  		}
>  	} else {
> -		u32 search_flag, alloc_flag;
> -
> -		if (flags & PIN_HIGH) {
> -			search_flag = DRM_MM_SEARCH_BELOW;
> -			alloc_flag = DRM_MM_CREATE_TOP;
> -		} else {
> -			search_flag = DRM_MM_SEARCH_DEFAULT;
> -			alloc_flag = DRM_MM_CREATE_DEFAULT;
> -		}
> -
> -		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
> -		 * so we know that we always have a minimum alignment of 4096.
> -		 * The drm_mm range manager is optimised to return results
> -		 * with zero alignment, so where possible use the optimal
> -		 * path.
> -		 */
> -		if (alignment <= 4096)
> -			alignment = 0;
> -
> -search_free:
> -		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
> -							  &vma->node,
> -							  size, alignment,
> -							  obj->cache_level,
> -							  start, end,
> -							  search_flag,
> -							  alloc_flag);
> -		if (ret) {
> -			ret = i915_gem_evict_something(vma->vm, size, alignment,
> -						       obj->cache_level,
> -						       start, end,
> -						       flags);
> -			if (ret == 0)
> -				goto search_free;
> -
> +		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
> +					  size, alignment, obj->cache_level,
> +					  start, end, flags);
> +		if (ret)
>  			goto err_unpin;
> -		}
>  
>  		GEM_BUG_ON(vma->node.start < start);
>  		GEM_BUG_ON(vma->node.start + vma->node.size > end);
> -- 
> 2.11.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 7d33b607bc89..1bb7a5b80d47 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -48,47 +48,34 @@  static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 	struct drm_i915_private *dev_priv = gvt->dev_priv;
-	u32 alloc_flag, search_flag;
+	unsigned int flags;
 	u64 start, end, size;
 	struct drm_mm_node *node;
-	int retried = 0;
 	int ret;
 
 	if (high_gm) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
 		node = &vgpu->gm.high_gm_node;
 		size = vgpu_hidden_sz(vgpu);
 		start = gvt_hidden_gmadr_base(gvt);
 		end = gvt_hidden_gmadr_end(gvt);
+		flags = PIN_HIGH;
 	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
 		node = &vgpu->gm.low_gm_node;
 		size = vgpu_aperture_sz(vgpu);
 		start = gvt_aperture_gmadr_base(gvt);
 		end = gvt_aperture_gmadr_end(gvt);
+		flags = PIN_MAPPABLE;
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-search_again:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
-						  node, size, 4096,
-						  I915_COLOR_UNEVICTABLE,
-						  start, end, search_flag,
-						  alloc_flag);
-	if (ret) {
-		ret = i915_gem_evict_something(&dev_priv->ggtt.base,
-					       size, 4096,
-					       I915_COLOR_UNEVICTABLE,
-					       start, end, 0);
-		if (ret == 0 && ++retried < 3)
-			goto search_again;
-
-		gvt_err("fail to alloc %s gm space from host, retried %d\n",
-				high_gm ? "high" : "low", retried);
-	}
+	ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
+				  size, 4096, I915_COLOR_UNEVICTABLE,
+				  start, end, flags);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
+	if (ret)
+		gvt_err("fail to alloc %s gm space from host\n",
+			high_gm ? "high" : "low");
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 6af9311f72f5..c8f1675852a7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2044,7 +2044,6 @@  static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	struct i915_address_space *vm = &ppgtt->base;
 	struct drm_i915_private *dev_priv = ppgtt->base.i915;
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	bool retried = false;
 	int ret;
 
 	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2057,29 +2056,14 @@  static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 	if (ret)
 		return ret;
 
-alloc:
-	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
-						  GEN6_PD_SIZE, GEN6_PD_ALIGN,
-						  I915_COLOR_UNEVICTABLE,
-						  0, ggtt->base.total,
-						  DRM_MM_TOPDOWN);
-	if (ret == -ENOSPC && !retried) {
-		ret = i915_gem_evict_something(&ggtt->base,
-					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
-					       I915_COLOR_UNEVICTABLE,
-					       0, ggtt->base.total,
-					       0);
-		if (ret)
-			goto err_out;
-
-		retried = true;
-		goto alloc;
-	}
-
+	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
+				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
+				  I915_COLOR_UNEVICTABLE,
+				  0, ggtt->base.total,
+				  PIN_HIGH);
 	if (ret)
 		goto err_out;
 
-
 	if (ppgtt->node.start < ggtt->mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
@@ -3553,3 +3537,49 @@  i915_get_ggtt_vma_pages(struct i915_vma *vma)
 	return ret;
 }
 
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags)
+{
+	u32 search_flag, alloc_flag;
+	int err;
+
+	lockdep_assert_held(&vm->i915->drm.struct_mutex);
+
+	if (flags & PIN_HIGH) {
+		search_flag = DRM_MM_SEARCH_BELOW;
+		alloc_flag = DRM_MM_CREATE_TOP;
+	} else {
+		search_flag = DRM_MM_SEARCH_DEFAULT;
+		alloc_flag = DRM_MM_CREATE_DEFAULT;
+	}
+
+	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
+	 * so we know that we always have a minimum alignment of 4096.
+	 * The drm_mm range manager is optimised to return results
+	 * with zero alignment, so where possible use the optimal
+	 * path.
+	 */
+	GEM_BUG_ON(size & 4095);
+	if (alignment <= 4096)
+		alignment = 0;
+
+	err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						  size, alignment, color,
+						  start, end,
+						  search_flag, alloc_flag);
+	if (err != -ENOSPC)
+		return err;
+
+	err = i915_gem_evict_something(vm, size, alignment, color,
+				       start, end, flags);
+	if (err)
+		return err;
+
+	search_flag = DRM_MM_SEARCH_DEFAULT;
+	return drm_mm_insert_node_in_range_generic(&vm->mm, node,
+						   size, alignment, color,
+						   start, end,
+						   search_flag, alloc_flag);
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 0055b8567a43..4c7bef07e38a 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -528,6 +528,11 @@  int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
 void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 			       struct sg_table *pages);
 
+int i915_gem_gtt_insert(struct i915_address_space *vm,
+			struct drm_mm_node *node,
+			u64 size, u64 alignment, unsigned long color,
+			u64 start, u64 end, unsigned int flags);
+
 /* Flags used by pin/bind&friends. */
 #define PIN_NONBLOCK		BIT(0)
 #define PIN_MAPPABLE		BIT(1)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index fd75d5704287..608008d2d999 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -415,43 +415,11 @@  i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 				goto err_unpin;
 		}
 	} else {
-		u32 search_flag, alloc_flag;
-
-		if (flags & PIN_HIGH) {
-			search_flag = DRM_MM_SEARCH_BELOW;
-			alloc_flag = DRM_MM_CREATE_TOP;
-		} else {
-			search_flag = DRM_MM_SEARCH_DEFAULT;
-			alloc_flag = DRM_MM_CREATE_DEFAULT;
-		}
-
-		/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
-		 * so we know that we always have a minimum alignment of 4096.
-		 * The drm_mm range manager is optimised to return results
-		 * with zero alignment, so where possible use the optimal
-		 * path.
-		 */
-		if (alignment <= 4096)
-			alignment = 0;
-
-search_free:
-		ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
-							  &vma->node,
-							  size, alignment,
-							  obj->cache_level,
-							  start, end,
-							  search_flag,
-							  alloc_flag);
-		if (ret) {
-			ret = i915_gem_evict_something(vma->vm, size, alignment,
-						       obj->cache_level,
-						       start, end,
-						       flags);
-			if (ret == 0)
-				goto search_free;
-
+		ret = i915_gem_gtt_insert(vma->vm, &vma->node,
+					  size, alignment, obj->cache_level,
+					  start, end, flags);
+		if (ret)
 			goto err_unpin;
-		}
 
 		GEM_BUG_ON(vma->node.start < start);
 		GEM_BUG_ON(vma->node.start + vma->node.size > end);