diff mbox

drm/i915: Extend GET_APERTURE ioctl to report available map space

Message ID 1410784424-927-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Sept. 15, 2014, 12:33 p.m. UTC
When constructing a batchbuffer, it is sometimes crucial to know the
largest hole into which we can fit a fenceable buffer (for example when
handling very large objects on gen2 and gen3). This depends on the
fragmentation of pinned buffers inside the aperture, a question only the
kernel can easily answer.

This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to
include a couple of new fields in its reply to userspace - the total
amount of space available in the mappable region of the aperture and
also the single largest block available.

This is not quite what userspace wants to answer the question of whether
this batch will fit as fences are also required to meet severe alignment
constraints within the batch. For this purpose, a third conservative
estimate of largest fence available is also provided. For when userspace
needs more than one batch, we also provide the culmulative space
available for fences such that it has some additional guidance to how
much space it could allocate to fences. Conservatism still wins.

The patch also adds a debugfs file for convenient testing and reporting.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c |  28 +++++++++
 drivers/gpu/drm/i915/i915_gem.c     | 111 ++++++++++++++++++++++++++++++++++--
 include/uapi/drm/i915_drm.h         |  20 +++++++
 3 files changed, 155 insertions(+), 4 deletions(-)

Comments

Konstantin Belousov Sept. 15, 2014, 1:52 p.m. UTC | #1
On Mon, Sep 15, 2014 at 01:33:44PM +0100, Chris Wilson wrote:
> When constructing a batchbuffer, it is sometimes crucial to know the
> largest hole into which we can fit a fenceable buffer (for example when
> handling very large objects on gen2 and gen3). This depends on the
> fragmentation of pinned buffers inside the aperture, a question only the
> kernel can easily answer.
> 
> This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to
> include a couple of new fields in its reply to userspace - the total
> amount of space available in the mappable region of the aperture and
> also the single largest block available.
> 
> This is not quite what userspace wants to answer the question of whether
> this batch will fit as fences are also required to meet severe alignment
> constraints within the batch. For this purpose, a third conservative
> estimate of largest fence available is also provided. For when userspace
> needs more than one batch, we also provide the culmulative space
> available for fences such that it has some additional guidance to how
> much space it could allocate to fences. Conservatism still wins.
> 
> The patch also adds a debugfs file for convenient testing and reporting.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c |  28 +++++++++
>  drivers/gpu/drm/i915/i915_gem.c     | 111 ++++++++++++++++++++++++++++++++++--
>  include/uapi/drm/i915_drm.h         |  20 +++++++
>  3 files changed, 155 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 9e63ccbea52e..41d92f29aef1 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -534,6 +534,33 @@ static int obj_rank_by_ggtt(void *priv, struct list_head *A, struct list_head *B
>  	return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b);
>  }
>  
> +static int i915_gem_aperture_info(struct seq_file *m, void *data)
> +{
> +	struct drm_info_node *node = m->private;
> +	struct drm_device *dev = node->minor->dev;
> +	struct drm_i915_gem_get_aperture arg;
> +	int ret;
> +
> +	ret = i915_gem_get_aperture_ioctl(dev, &arg, NULL);
> +	if (ret)
> +		return ret;
> +
> +	seq_printf(m, "Total size of the GTT: %llu bytes\n",
> +		   arg.aper_size);
> +	seq_printf(m, "Available space in the GTT: %llu bytes\n",
> +		   arg.aper_available_size);
> +	seq_printf(m, "Available space in the mappable aperture: %u bytes\n",
> +		   arg.map_available_size);
> +	seq_printf(m, "Single largest space in the mappable aperture: %u bytes\n",
> +		   arg.map_largest_size);
> +	seq_printf(m, "Available space for fences: %u bytes\n",
> +		   arg.fence_available_size);
> +	seq_printf(m, "Single largest fence available: %u bytes\n",
> +		   arg.fence_largest_size);
> +
> +	return 0;
> +}
> +
>  static int i915_gem_gtt_info(struct seq_file *m, void *data)
>  {
>  	struct drm_info_node *node = m->private;
> @@ -4198,6 +4225,7 @@ static int i915_debugfs_create(struct dentry *root,
>  static const struct drm_info_list i915_debugfs_list[] = {
>  	{"i915_capabilities", i915_capabilities, 0},
>  	{"i915_gem_objects", i915_gem_object_info, 0},
> +	{"i915_gem_aperture", i915_gem_aperture_info, 0},
>  	{"i915_gem_gtt", i915_gem_gtt_info, 0},
>  	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
>  	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4b9de297b967..4b75086a1dc9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -31,6 +31,7 @@
>  #include "i915_drv.h"
>  #include "i915_trace.h"
>  #include "intel_drv.h"
> +#include <linux/list_sort.h>
>  #include <linux/oom.h>
>  #include <linux/shmem_fs.h>
>  #include <linux/slab.h>
> @@ -260,6 +261,49 @@ i915_gem_init_ioctl(struct drm_device *dev, void *data,
>  	return 0;
>  }
>  
> +static int obj_rank_by_ggtt(void *priv,
> +			    struct list_head *A,
> +			    struct list_head *B)
> +{
> +	struct drm_i915_gem_object *a = list_entry(A,typeof(*a), obj_exec_link);
> +	struct drm_i915_gem_object *b = list_entry(B,typeof(*b), obj_exec_link);
> +
> +	return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b);
> +}
> +
> +static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end)
> +{
> +	u32 size = end - start;
> +	u32 fence_size;
> +
> +	if (INTEL_INFO(dev_priv)->gen < 4) {
> +		u32 fence_max;
> +		u32 fence_next;
> +
> +		if (IS_GEN3(dev_priv)) {
> +			fence_max = I830_FENCE_MAX_SIZE_VAL << 20;
> +			fence_next = 1024*1024;
> +		} else {
> +			fence_max = I830_FENCE_MAX_SIZE_VAL << 19;
> +			fence_next = 512*1024;
> +		}
> +
> +		fence_max = min(fence_max, size);
> +		fence_size = 0;
> +		while (fence_next <= fence_max) {
> +			u32 base = ALIGN(start, fence_next);
> +			if (base + fence_next > end)
> +				break;
> +
> +			fence_size = fence_next;
> +			fence_next <<= 1;
> +		}
> +	} else
> +		fence_size = size;
> +
> +	return fence_size;
> +}
> +
>  int
>  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  			    struct drm_file *file)
> @@ -267,17 +311,76 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
>  	struct drm_i915_private *dev_priv = dev->dev_private;
>  	struct drm_i915_gem_get_aperture *args = data;
>  	struct drm_i915_gem_object *obj;
> -	size_t pinned;
> +	struct list_head map_list;
> +	const u32 map_limit = dev_priv->gtt.mappable_end;
> +	size_t pinned, map_space, map_largest, fence_space, fence_largest;
> +	u32 last, size;
> +
> +	INIT_LIST_HEAD(&map_list);
>  
>  	pinned = 0;
> +	map_space = map_largest = 0;
> +	fence_space = fence_largest = 0;
> +
>  	mutex_lock(&dev->struct_mutex);
> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> -		if (i915_gem_obj_is_pinned(obj))
> -			pinned += i915_gem_obj_ggtt_size(obj);
> +	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
> +		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> +
> +		if (vma == NULL || !vma->pin_count)
> +			continue;
> +
> +		pinned += vma->node.size;
> +
> +		if (vma->node.start < map_limit)
> +			list_add(&obj->obj_exec_link, &map_list);
> +	}
> +
> +	last = ~0;
> +	list_sort(NULL, &map_list, obj_rank_by_ggtt);
> +	while (!list_empty(&map_list)) {
> +		struct i915_vma *vma;
> +
> +		obj = list_first_entry(&map_list, typeof(*obj), obj_exec_link);
> +		list_del_init(&obj->obj_exec_link);
> +
> +		vma = i915_gem_obj_to_ggtt(obj);
> +		if (last == ~0)
> +			goto skip_first;
> +
> +		size = vma->node.start - last;
> +		if (size > map_largest)
> +			map_largest = size;
> +		map_space += size;
> +
> +		size = __fence_size(dev_priv, last, vma->node.start);
> +		if (size > fence_largest)
> +			fence_largest = size;
> +		fence_space += size;
> +
> +skip_first:
> +		last = vma->node.start + vma->node.size;
> +	}
> +	if (last == ~0U)
> +		last = 0;
> +	if (last < map_limit) {
> +		size = map_limit - last;
> +		if (size > map_largest)
> +			map_largest = size;
> +		map_space += size;
> +
> +		size = __fence_size(dev_priv, last, map_limit);
> +		if (size > fence_largest)
> +			fence_largest = size;
> +		fence_space += size;
> +	}
>  	mutex_unlock(&dev->struct_mutex);
>  
>  	args->aper_size = dev_priv->gtt.base.total;
>  	args->aper_available_size = args->aper_size - pinned;
> +	args->map_available_size = map_space;
> +	args->map_largest_size = map_largest;
> +	args->fence_available_size = fence_space;
> +	args->fence_largest_size = fence_largest;
>  
>  	return 0;
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 422513ba9e59..35308691cf8d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -941,6 +941,26 @@ struct drm_i915_gem_get_aperture {
>  	 * bytes
>  	 */
>  	__u64 aper_available_size;
> +
> +	/**
> +	 * Available space in the mappable region of the aperture, in bytes
> +	 */
> +	__u32 map_available_size;
> +
> +	/**
> +	 * Single largest available region inside the mappable region, in bytes.
> +	 */
> +	__u32 map_largest_size;
> +
> +	/**
> +	 * Culmulative space available for fences, in bytes
> +	 */
> +	__u32 fence_available_size;
> +
> +	/**
> +	 * Single largest fenceable region, in bytes.
> +	 */
> +	__u32 fence_largest_size;
>  };
So what will happen when old usermode program (with short old structure)
calls the ioctl ?  I believe the memory which happens to be located
after the structure is corrupted, or am I missing some magic there ?

I.e., the question is why this patch does not break the ABI.

>  
>  struct drm_i915_get_pipe_from_crtc_id {
> -- 
> 2.1.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Sept. 15, 2014, 3:29 p.m. UTC | #2
On Mon, Sep 15, 2014 at 04:52:27PM +0300, Konstantin Belousov wrote:
> So what will happen when old usermode program (with short old structure)
> calls the ioctl ?  I believe the memory which happens to be located
> after the structure is corrupted, or am I missing some magic there ?
> 
> I.e., the question is why this patch does not break the ABI.

The ioctl is buffered in drm_ioctl. Space large enough for the kernel
structure is allocated from the heap/stack and the incoming user
structure (if required) is copied into the kernel struct and zero
extended. After the ioctl, if the struct is an out parameter, what fits
into the userspace struct is copied back from the kernel struct. This
has the dual benefit of allowing us to extend structures so long as we
take care that incoming zeroes from old userspace retain existing
behaviour, and vice versa with new userspace and old kernels, and also
moves the copy_from_user/copy_to_uesr dance for the majority of cases
into a single place (at the cost of giving up some microoptimisations).
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e63ccbea52e..41d92f29aef1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -534,6 +534,33 @@  static int obj_rank_by_ggtt(void *priv, struct list_head *A, struct list_head *B
 	return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b);
 }
 
+static int i915_gem_aperture_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_gem_get_aperture arg;
+	int ret;
+
+	ret = i915_gem_get_aperture_ioctl(dev, &arg, NULL);
+	if (ret)
+		return ret;
+
+	seq_printf(m, "Total size of the GTT: %llu bytes\n",
+		   arg.aper_size);
+	seq_printf(m, "Available space in the GTT: %llu bytes\n",
+		   arg.aper_available_size);
+	seq_printf(m, "Available space in the mappable aperture: %u bytes\n",
+		   arg.map_available_size);
+	seq_printf(m, "Single largest space in the mappable aperture: %u bytes\n",
+		   arg.map_largest_size);
+	seq_printf(m, "Available space for fences: %u bytes\n",
+		   arg.fence_available_size);
+	seq_printf(m, "Single largest fence available: %u bytes\n",
+		   arg.fence_largest_size);
+
+	return 0;
+}
+
 static int i915_gem_gtt_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = m->private;
@@ -4198,6 +4225,7 @@  static int i915_debugfs_create(struct dentry *root,
 static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
+	{"i915_gem_aperture", i915_gem_aperture_info, 0},
 	{"i915_gem_gtt", i915_gem_gtt_info, 0},
 	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4b9de297b967..4b75086a1dc9 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -31,6 +31,7 @@ 
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
+#include <linux/list_sort.h>
 #include <linux/oom.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
@@ -260,6 +261,49 @@  i915_gem_init_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int obj_rank_by_ggtt(void *priv,
+			    struct list_head *A,
+			    struct list_head *B)
+{
+	struct drm_i915_gem_object *a = list_entry(A,typeof(*a), obj_exec_link);
+	struct drm_i915_gem_object *b = list_entry(B,typeof(*b), obj_exec_link);
+
+	return i915_gem_obj_ggtt_offset(a) - i915_gem_obj_ggtt_offset(b);
+}
+
+static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end)
+{
+	u32 size = end - start;
+	u32 fence_size;
+
+	if (INTEL_INFO(dev_priv)->gen < 4) {
+		u32 fence_max;
+		u32 fence_next;
+
+		if (IS_GEN3(dev_priv)) {
+			fence_max = I830_FENCE_MAX_SIZE_VAL << 20;
+			fence_next = 1024*1024;
+		} else {
+			fence_max = I830_FENCE_MAX_SIZE_VAL << 19;
+			fence_next = 512*1024;
+		}
+
+		fence_max = min(fence_max, size);
+		fence_size = 0;
+		while (fence_next <= fence_max) {
+			u32 base = ALIGN(start, fence_next);
+			if (base + fence_next > end)
+				break;
+
+			fence_size = fence_next;
+			fence_next <<= 1;
+		}
+	} else
+		fence_size = size;
+
+	return fence_size;
+}
+
 int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file)
@@ -267,17 +311,76 @@  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_get_aperture *args = data;
 	struct drm_i915_gem_object *obj;
-	size_t pinned;
+	struct list_head map_list;
+	const u32 map_limit = dev_priv->gtt.mappable_end;
+	size_t pinned, map_space, map_largest, fence_space, fence_largest;
+	u32 last, size;
+
+	INIT_LIST_HEAD(&map_list);
 
 	pinned = 0;
+	map_space = map_largest = 0;
+	fence_space = fence_largest = 0;
+
 	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (i915_gem_obj_is_pinned(obj))
-			pinned += i915_gem_obj_ggtt_size(obj);
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
+
+		if (vma == NULL || !vma->pin_count)
+			continue;
+
+		pinned += vma->node.size;
+
+		if (vma->node.start < map_limit)
+			list_add(&obj->obj_exec_link, &map_list);
+	}
+
+	last = ~0;
+	list_sort(NULL, &map_list, obj_rank_by_ggtt);
+	while (!list_empty(&map_list)) {
+		struct i915_vma *vma;
+
+		obj = list_first_entry(&map_list, typeof(*obj), obj_exec_link);
+		list_del_init(&obj->obj_exec_link);
+
+		vma = i915_gem_obj_to_ggtt(obj);
+		if (last == ~0)
+			goto skip_first;
+
+		size = vma->node.start - last;
+		if (size > map_largest)
+			map_largest = size;
+		map_space += size;
+
+		size = __fence_size(dev_priv, last, vma->node.start);
+		if (size > fence_largest)
+			fence_largest = size;
+		fence_space += size;
+
+skip_first:
+		last = vma->node.start + vma->node.size;
+	}
+	if (last == ~0U)
+		last = 0;
+	if (last < map_limit) {
+		size = map_limit - last;
+		if (size > map_largest)
+			map_largest = size;
+		map_space += size;
+
+		size = __fence_size(dev_priv, last, map_limit);
+		if (size > fence_largest)
+			fence_largest = size;
+		fence_space += size;
+	}
 	mutex_unlock(&dev->struct_mutex);
 
 	args->aper_size = dev_priv->gtt.base.total;
 	args->aper_available_size = args->aper_size - pinned;
+	args->map_available_size = map_space;
+	args->map_largest_size = map_largest;
+	args->fence_available_size = fence_space;
+	args->fence_largest_size = fence_largest;
 
 	return 0;
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 422513ba9e59..35308691cf8d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -941,6 +941,26 @@  struct drm_i915_gem_get_aperture {
 	 * bytes
 	 */
 	__u64 aper_available_size;
+
+	/**
+	 * Available space in the mappable region of the aperture, in bytes
+	 */
+	__u32 map_available_size;
+
+	/**
+	 * Single largest available region inside the mappable region, in bytes.
+	 */
+	__u32 map_largest_size;
+
+	/**
+	 * Culmulative space available for fences, in bytes
+	 */
+	__u32 fence_available_size;
+
+	/**
+	 * Single largest fenceable region, in bytes.
+	 */
+	__u32 fence_largest_size;
 };
 
 struct drm_i915_get_pipe_from_crtc_id {