diff mbox

[11/11] drm/i915: Allow userspace to request an object at a specific offset

Message ID 1357642399-7678-12-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Jan. 8, 2013, 10:53 a.m. UTC
Certain workarounds and workloads require objects at specific or at
least known offsets. Privileged users could pin an object into the GTT,
but that has obvious limitations for the general case. Instead, the user
can construct a batch assuming a particular layout for an object and
request that the kernel try its utmost to provide the object at that
location. This has the advantage that not only can it fail, but also
such allocations are transitory - although contention should be rare and
the object persist at the same location between batches. The benefit for
userspace is that it can then avoid all relocations referencing this
object as it resides at a known space - this becomes even more useful
with per-process GTT spaces where there will be virtually no contention
between applications.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h            |    8 ++
 drivers/gpu/drm/i915/i915_gem.c            |   10 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 +++++++++++++++++++++++++++-
 include/uapi/drm/i915_drm.h                |    3 +-
 4 files changed, 151 insertions(+), 9 deletions(-)

Comments

Imre Deak Jan. 16, 2013, 10:26 a.m. UTC | #1
On Tue, 2013-01-08 at 10:53 +0000, Chris Wilson wrote:
> Certain workarounds and workloads require objects at specific or at
> least known offsets. Privileged users could pin an object into the GTT,
> but that has obvious limitations for the general case. Instead, the user
> can construct a batch assuming a particular layout for an object and
> request that the kernel try its utmost to provide the object at that
> location. This has the advantage that not only can it fail, but also
> such allocations are transitory - although contention should be rare and
> the object persist at the same location between batches. The benefit for
> userspace is that it can then avoid all relocations referencing this
> object as it resides at a known space - this becomes even more useful
> with per-process GTT spaces where there will be virtually no contention
> between applications.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |    8 ++
>  drivers/gpu/drm/i915/i915_gem.c            |   10 +-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  139 +++++++++++++++++++++++++++-
>  include/uapi/drm/i915_drm.h                |    3 +-
>  4 files changed, 151 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 97e2049..7da4953 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1429,6 +1429,14 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>  						  size_t size);
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  
> +uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
> +				    uint32_t size, int tiling_mode);
> +uint32_t i915_gem_get_gtt_size(struct drm_device *dev,
> +			       uint32_t size, int tiling_mode);
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> +			      struct drm_mm_node *gtt_space,
> +			      unsigned long cache_level);
> +
>  int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  				     uint32_t alignment,
>  				     bool map_and_fenceable,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b7661e1..f1a23bb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1435,7 +1435,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
>  	obj->fault_mappable = false;
>  }
>  
> -static uint32_t
> +uint32_t
>  i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
>  {
>  	uint32_t gtt_size;
> @@ -1463,7 +1463,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
>   * Return the required GTT alignment for an object, taking into account
>   * potential fence register mapping.
>   */
> -static uint32_t
> +uint32_t
>  i915_gem_get_gtt_alignment(struct drm_device *dev,
>  			   uint32_t size,
>  			   int tiling_mode)
> @@ -2833,9 +2833,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
>  	return 0;
>  }
>  
> -static bool i915_gem_valid_gtt_space(struct drm_device *dev,
> -				     struct drm_mm_node *gtt_space,
> -				     unsigned long cache_level)
> +bool i915_gem_valid_gtt_space(struct drm_device *dev,
> +			      struct drm_mm_node *gtt_space,
> +			      unsigned long cache_level)
>  {
>  	struct drm_mm_node *other;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index f6bd92c..bb8b0d6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -403,6 +403,126 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
>  	return ret;
>  }
>  
> +static struct drm_mm_node *
> +get_pinned_block(struct drm_i915_gem_object *obj, u32 size)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_mm_node *gtt;
> +
> +	gtt = drm_mm_create_block(&dev_priv->mm.gtt_space,
> +				  obj->exec_entry->offset,
> +				  size,
> +				  false);
> +	if (gtt == NULL)
> +		return NULL;
> +
> +	if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) {
> +		drm_mm_put_block(gtt);
> +		return NULL;
> +	}
> +
> +	gtt->color = obj->cache_level;
> +	return gtt;
> +}
> +
> +static int
> +__i915_gem_evict_range(struct drm_device *dev, u32 start, u32 end, u32 color)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_object *obj, *next;
> +
> +	list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, gtt_list) {
> +		u32 node_start = obj->gtt_space->start;
> +		u32 node_end = obj->gtt_space->start + obj->gtt_space->size;
> +
> +		if (!HAS_LLC(dev)) {
> +			if (node_end <= start && obj->tiling_mode != color)
> +				node_end += 4096;
> +			if (node_start >= end && obj->tiling_mode != color)
> +				node_start -= 4096;
> +		}
> +
> +		if (node_end > start && node_start < end) {
> +			int ret = i915_gem_object_unbind(obj);
> +			if (ret)
> +				return ret;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj)
> +{
> +	struct drm_device *dev = obj->base.dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> +	struct drm_mm_node *gtt;
> +	bool fenceable;
> +	u32 size;
> +	int ret;
> +
> +	if (entry->alignment && entry->offset & (entry->alignment - 1))
> +		return -EINVAL;
> +
> +	if (obj->gtt_offset == entry->offset)
> +		return 0;
> +
> +	if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) {
> +		fenceable = false;
> +		if (entry->offset & (i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1))
> +			return -EINVAL;
> +	}
> +
> +	i915_gem_object_pin_pages(obj);
> +
> +	ret = i915_gem_object_unbind(obj);
> +	if (ret)
> +		goto unpin_pages;
> +
> +	size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode);
> +	gtt = get_pinned_block(obj, size);
> +	if (gtt == NULL) {
> +		ret = __i915_gem_evict_range(dev,
> +					     entry->offset,
> +					     entry->offset + size,
> +					     obj->tiling_mode);

A typo as discussed on IRC, tiling_mode should be cache_level. The same
goes for __i915_gem_evict_range(). Otherwise on the series:

Reviewed-by: Imre Deak <imre.deak@intel.com>

--Imre
Daniel Vetter Jan. 16, 2013, 4:55 p.m. UTC | #2
On Tue, Jan 08, 2013 at 10:53:19AM +0000, Chris Wilson wrote:
> Certain workarounds and workloads require objects at specific or at
> least known offsets. Privileged users could pin an object into the GTT,
> but that has obvious limitations for the general case. Instead, the user
> can construct a batch assuming a particular layout for an object and
> request that the kernel try its utmost to provide the object at that
> location. This has the advantage that not only can it fail, but also
> such allocations are transitory - although contention should be rare and
> the object persist at the same location between batches. The benefit for
> userspace is that it can then avoid all relocations referencing this
> object as it resides at a known space - this becomes even more useful
> with per-process GTT spaces where there will be virtually no contention
> between applications.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

I'm unsure about api implications of this one here, and slightly afraid of
userspace managing to put itself into an ugly corner. Also, I'm not sure
whether this is the interface we want for real ppgtt, a simple (real) pin
interface which reserves the ppgtt address (but doesn't necessarily force
the backing storage to be pinned) feels saner.

So until I see more torches&pitchforks from my appartment, I'd like to
defer this until we have real ppgtt available ...
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 97e2049..7da4953 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1429,6 +1429,14 @@  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
 
+uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev,
+				    uint32_t size, int tiling_mode);
+uint32_t i915_gem_get_gtt_size(struct drm_device *dev,
+			       uint32_t size, int tiling_mode);
+bool i915_gem_valid_gtt_space(struct drm_device *dev,
+			      struct drm_mm_node *gtt_space,
+			      unsigned long cache_level);
+
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b7661e1..f1a23bb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1435,7 +1435,7 @@  i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	obj->fault_mappable = false;
 }
 
-static uint32_t
+uint32_t
 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
 {
 	uint32_t gtt_size;
@@ -1463,7 +1463,7 @@  i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
  * Return the required GTT alignment for an object, taking into account
  * potential fence register mapping.
  */
-static uint32_t
+uint32_t
 i915_gem_get_gtt_alignment(struct drm_device *dev,
 			   uint32_t size,
 			   int tiling_mode)
@@ -2833,9 +2833,9 @@  i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
-static bool i915_gem_valid_gtt_space(struct drm_device *dev,
-				     struct drm_mm_node *gtt_space,
-				     unsigned long cache_level)
+bool i915_gem_valid_gtt_space(struct drm_device *dev,
+			      struct drm_mm_node *gtt_space,
+			      unsigned long cache_level)
 {
 	struct drm_mm_node *other;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f6bd92c..bb8b0d6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -403,6 +403,126 @@  i915_gem_execbuffer_relocate(struct drm_device *dev,
 	return ret;
 }
 
+static struct drm_mm_node *
+get_pinned_block(struct drm_i915_gem_object *obj, u32 size)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_mm_node *gtt;
+
+	gtt = drm_mm_create_block(&dev_priv->mm.gtt_space,
+				  obj->exec_entry->offset,
+				  size,
+				  false);
+	if (gtt == NULL)
+		return NULL;
+
+	if (!i915_gem_valid_gtt_space(dev, gtt, obj->cache_level)) {
+		drm_mm_put_block(gtt);
+		return NULL;
+	}
+
+	gtt->color = obj->cache_level;
+	return gtt;
+}
+
+static int
+__i915_gem_evict_range(struct drm_device *dev, u32 start, u32 end, u32 color)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj, *next;
+
+	list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, gtt_list) {
+		u32 node_start = obj->gtt_space->start;
+		u32 node_end = obj->gtt_space->start + obj->gtt_space->size;
+
+		if (!HAS_LLC(dev)) {
+			if (node_end <= start && obj->tiling_mode != color)
+				node_end += 4096;
+			if (node_start >= end && obj->tiling_mode != color)
+				node_start -= 4096;
+		}
+
+		if (node_end > start && node_start < end) {
+			int ret = i915_gem_object_unbind(obj);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
+i915_gem_execbuffer_pinned_object(struct drm_i915_gem_object *obj)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
+	struct drm_mm_node *gtt;
+	bool fenceable;
+	u32 size;
+	int ret;
+
+	if (entry->alignment && entry->offset & (entry->alignment - 1))
+		return -EINVAL;
+
+	if (obj->gtt_offset == entry->offset)
+		return 0;
+
+	if (entry->offset & (i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1)) {
+		fenceable = false;
+		if (entry->offset & (i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode) - 1))
+			return -EINVAL;
+	}
+
+	i915_gem_object_pin_pages(obj);
+
+	ret = i915_gem_object_unbind(obj);
+	if (ret)
+		goto unpin_pages;
+
+	size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode);
+	gtt = get_pinned_block(obj, size);
+	if (gtt == NULL) {
+		ret = __i915_gem_evict_range(dev,
+					     entry->offset,
+					     entry->offset + size,
+					     obj->tiling_mode);
+		if (ret)
+			goto unpin_pages;
+
+		gtt = get_pinned_block(obj, size);
+	}
+	if (gtt == NULL) {
+		ret = -EBUSY;
+		goto unpin_pages;
+	}
+
+	ret = i915_gem_gtt_prepare_object(obj);
+	if (ret) {
+		drm_mm_put_block(gtt);
+		goto unpin_pages;
+	}
+
+	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
+	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+
+	obj->gtt_space = gtt;
+	obj->gtt_offset += gtt->start;
+
+	obj->map_and_fenceable =
+		fenceable && obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
+	trace_i915_gem_object_bind(obj, false);
+
+	if (!dev_priv->mm.aliasing_ppgtt)
+		i915_gem_gtt_bind_object(obj, obj->cache_level);
+
+unpin_pages:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
 static int
 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 				   struct intel_ring_buffer *ring,
@@ -414,6 +534,12 @@  i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 	bool need_fence;
 	int ret;
 
+	if (entry->flags & EXEC_OBJECT_PINNED) {
+		ret = i915_gem_execbuffer_pinned_object(obj);
+		if (ret)
+			return ret;
+	}
+
 	need_fence =
 		has_fenced_gpu_access &&
 		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
@@ -425,6 +551,10 @@  i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
+	if (entry->flags & EXEC_OBJECT_PINNED &&
+	    obj->gtt_offset != entry->offset)
+		return -EINVAL;
+
 	if (has_fenced_gpu_access) {
 		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
 			ret = i915_gem_object_get_fence(obj);
@@ -489,11 +619,12 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    bool *need_relocs)
 {
 	struct drm_i915_gem_object *obj;
-	struct list_head ordered_objects;
+	struct list_head ordered_objects, pinned_objects;
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
 	INIT_LIST_HEAD(&ordered_objects);
+	INIT_LIST_HEAD(&pinned_objects);
 	while (!list_empty(objects)) {
 		struct drm_i915_gem_exec_object2 *entry;
 		bool need_fence;
@@ -507,8 +638,9 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			has_fenced_gpu_access &&
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 			obj->tiling_mode != I915_TILING_NONE;
-
-		if (need_fence)
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			list_move(&obj->exec_list, &pinned_objects);
+		else if (need_fence)
 			list_move(&obj->exec_list, &ordered_objects);
 		else
 			list_move_tail(&obj->exec_list, &ordered_objects);
@@ -517,6 +649,7 @@  i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 		obj->base.pending_write_domain = 0;
 		obj->pending_fenced_gpu_access = false;
 	}
+	list_splice(&pinned_objects, &ordered_objects);
 	list_splice(&ordered_objects, objects);
 
 	/* Attempt to pin all of the buffers into the GTT.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 07d5941..e71552d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -632,7 +632,8 @@  struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_PINNED	(1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
 	__u64 flags;
 
 	__u64 rsvd1;