diff mbox

[11/12] drm/i915: Remove request retirement before each batch

Message ID 1448023432-10726-11-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Nov. 20, 2015, 12:43 p.m. UTC
This reimplements the denial-of-service protection against igt from

commit 227f782e4667fc622810bce8be8ccdeee45f89c2
Author: Chris Wilson <chris@chris-wilson.co.uk>
Date:   Thu May 15 10:41:42 2014 +0100

    drm/i915: Retire requests before creating a new one

and transfers the stall from before each batch into a new close handler.
The issue is that the stall is increasing latency between batches which
is detrimental in some cases (especially coupled with execlists) to
keeping the GPU well fed. Also we make the observation that retiring
requests can of itself free objects (and requests) and therefore makes
a good first step when shrinking. However, we do wish to do a retire
before forcing an allocation of a new batch pool object (prior to an
execbuffer), but we make the optimisation that we only need to do so if
the oldest available batch pool object is active.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c            |  1 +
 drivers/gpu/drm/i915/i915_drv.h            |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c            | 17 ++++++++++++++++-
 drivers/gpu/drm/i915/i915_gem_batch_pool.c | 25 +++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_batch_pool.h |  7 +++++--
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  2 --
 drivers/gpu/drm/i915/i915_gem_shrinker.c   |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c           |  2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 28 ++++++++++++++--------------
 9 files changed, 58 insertions(+), 29 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 10c4cc2cece9..ffd99d27fac7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1663,6 +1663,7 @@  static struct drm_driver driver = {
 	.debugfs_init = i915_debugfs_init,
 	.debugfs_cleanup = i915_debugfs_cleanup,
 #endif
+	.gem_close_object = i915_gem_close_object,
 	.gem_free_object = i915_gem_free_object,
 	.gem_vm_ops = &i915_gem_vm_ops,
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e86e1188deb0..3fe933e4e664 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2767,6 +2767,8 @@  struct drm_i915_gem_object *i915_gem_object_create_from_data(
 		struct drm_device *dev, const void *data, size_t size);
 void i915_init_vm(struct drm_i915_private *dev_priv,
 		  struct i915_address_space *vm);
+void i915_gem_close_object(struct drm_gem_object *obj,
+			   struct drm_file *file);
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
@@ -2889,7 +2891,6 @@  struct drm_i915_gem_request *
 i915_gem_find_active_request(struct intel_engine_cs *ring);
 
 bool i915_gem_retire_requests(struct drm_device *dev);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
 int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
 				      bool interruptible);
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 390cb9c15bad..d3609e111647 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2261,6 +2261,10 @@  i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	for (i = 0; i < page_count; i++) {
 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
 		if (IS_ERR(page)) {
+			i915_gem_retire_requests(dev_priv->dev);
+			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+		}
+		if (IS_ERR(page)) {
 			i915_gem_shrink(dev_priv,
 					page_count,
 					I915_SHRINK_BOUND |
@@ -2880,7 +2884,7 @@  void i915_gem_reset(struct drm_device *dev)
 /**
  * This function clears the request list as sequence numbers are passed.
  */
-void
+static void
 i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 {
 	WARN_ON(i915_verify_lists(ring->dev));
@@ -3037,6 +3041,17 @@  retire:
 	return 0;
 }
 
+void i915_gem_close_object(struct drm_gem_object *gem,
+			   struct drm_file *file)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+
+	if (obj->active && mutex_trylock(&obj->base.dev->struct_mutex)) {
+		(void)i915_gem_object_flush_active(obj);
+		mutex_unlock(&obj->base.dev->struct_mutex);
+	}
+}
+
 /**
  * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
  * @DRM_IOCTL_ARGS: standard ioctl arguments
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
index 7bf2f3f2968e..b8ec7131e064 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c
@@ -41,15 +41,15 @@ 
 
 /**
  * i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @dev: the drm device
+ * @engine: the associated request submission engine
  * @pool: the batch buffer pool
  */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	pool->dev = dev;
+	pool->engine = engine;
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
 		INIT_LIST_HEAD(&pool->cache_list[n]);
@@ -65,7 +65,7 @@  void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
 {
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->dev->struct_mutex);
 
 	for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
 		while (!list_empty(&pool->cache_list[n])) {
@@ -102,7 +102,7 @@  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	struct list_head *list;
 	int n;
 
-	WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+	lockdep_assert_held(&pool->engine->dev->struct_mutex);
 
 	/* Compute a power-of-two bucket, but throw everything greater than
 	 * 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -115,8 +115,17 @@  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 
 	list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
 		/* The batches are strictly LRU ordered */
-		if (tmp->active)
-			break;
+		if (tmp->active) {
+			struct drm_i915_gem_request *rq;
+
+			/* XXX To be simplified later! */
+			rq = tmp->last_read_req[pool->engine->id];
+			if (!i915_gem_request_completed(rq, true))
+				break;
+
+			BUG_ON(tmp->active & ~intel_ring_flag(pool->engine));
+			BUG_ON(tmp->last_write_req);
+		}
 
 		/* While we're looping, do some clean up */
 		if (tmp->madv == __I915_MADV_PURGED) {
@@ -134,7 +143,7 @@  i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
 	if (obj == NULL) {
 		int ret;
 
-		obj = i915_gem_alloc_object(pool->dev, size);
+		obj = i915_gem_alloc_object(pool->engine->dev, size);
 		if (obj == NULL)
 			return ERR_PTR(-ENOMEM);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
index 848e90703eed..7fd4df0a29fe 100644
--- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h
+++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h
@@ -27,13 +27,16 @@ 
 
 #include "i915_drv.h"
 
+struct drm_device;
+struct intel_engine_cs;
+
 struct i915_gem_batch_pool {
-	struct drm_device *dev;
+	struct intel_engine_cs *engine;
 	struct list_head cache_list[4];
 };
 
 /* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
 			      struct i915_gem_batch_pool *pool);
 void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
 struct drm_i915_gem_object*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 46907f5da4f2..43856bdbb330 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -712,8 +712,6 @@  i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
-	i915_gem_retire_requests_ring(ring);
-
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
 
 	INIT_LIST_HEAD(&ordered_vmas);
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index b2ccb7346899..0c7d654195a3 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -216,6 +216,8 @@  i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 	if (!i915_gem_shrinker_lock(dev, &unlock))
 		return 0;
 
+	i915_gem_retire_requests(dev);
+
 	count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 		if (obj->pages_pin_count == 0)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 222ae8383f48..61aa6dd294c2 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1654,7 +1654,7 @@  static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
-	i915_gem_batch_pool_init(dev, &ring->batch_pool);
+	i915_gem_batch_pool_init(ring, &ring->batch_pool);
 	init_waitqueue_head(&ring->irq_queue);
 
 	INIT_LIST_HEAD(&ring->execlist_queue);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index f6b7e209cc3c..f33db3495e35 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2045,23 +2045,23 @@  int intel_alloc_ringbuffer_obj(struct drm_device *dev,
 static int intel_init_ring_buffer(struct drm_device *dev,
 				  struct intel_engine_cs *engine)
 {
-	struct intel_ringbuffer *ringbuf;
+	struct intel_ringbuffer *ring;
 	int ret;
 
 	WARN_ON(engine->buffer);
 
-	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
-	if (!ringbuf)
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring)
 		return -ENOMEM;
-	engine->buffer = ringbuf;
+	engine->buffer = ring;
 
 	engine->dev = dev;
 	INIT_LIST_HEAD(&engine->active_list);
 	INIT_LIST_HEAD(&engine->request_list);
 	INIT_LIST_HEAD(&engine->execlist_queue);
-	i915_gem_batch_pool_init(dev, &engine->batch_pool);
-	ringbuf->size = 32 * PAGE_SIZE;
-	ringbuf->engine = engine;
+	i915_gem_batch_pool_init(engine, &engine->batch_pool);
+	ring->size = 32 * PAGE_SIZE;
+	ring->engine = engine;
 	memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno));
 
 	init_waitqueue_head(&engine->irq_queue);
@@ -2077,20 +2077,20 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 			goto error;
 	}
 
-	WARN_ON(ringbuf->obj);
+	WARN_ON(ring->obj);
 
-	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+	ret = intel_alloc_ringbuffer_obj(dev, ring);
 	if (ret) {
 		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
 				engine->name, ret);
 		goto error;
 	}
 
-	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+	ret = intel_pin_and_map_ringbuffer_obj(dev, ring);
 	if (ret) {
 		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
 				engine->name, ret);
-		intel_destroy_ringbuffer_obj(ringbuf);
+		intel_destroy_ringbuffer_obj(ring);
 		goto error;
 	}
 
@@ -2098,9 +2098,9 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	 * the TAIL pointer points to within the last 2 cachelines
 	 * of the buffer.
 	 */
-	ringbuf->effective_size = ringbuf->size;
+	ring->effective_size = ring->size;
 	if (IS_I830(dev) || IS_845G(dev))
-		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
+		ring->effective_size -= 2 * CACHELINE_BYTES;
 
 	ret = i915_cmd_parser_init_ring(engine);
 	if (ret)
@@ -2109,7 +2109,7 @@  static int intel_init_ring_buffer(struct drm_device *dev,
 	return 0;
 
 error:
-	kfree(ringbuf);
+	kfree(ring);
 	engine->buffer = NULL;
 	return ret;
 }