@@ -1663,6 +1663,7 @@ static struct drm_driver driver = {
.debugfs_init = i915_debugfs_init,
.debugfs_cleanup = i915_debugfs_cleanup,
#endif
+ .gem_close_object = i915_gem_close_object,
.gem_free_object = i915_gem_free_object,
.gem_vm_ops = &i915_gem_vm_ops,
@@ -2767,6 +2767,8 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data(
struct drm_device *dev, const void *data, size_t size);
void i915_init_vm(struct drm_i915_private *dev_priv,
struct i915_address_space *vm);
+void i915_gem_close_object(struct drm_gem_object *obj,
+ struct drm_file *file);
void i915_gem_free_object(struct drm_gem_object *obj);
void i915_gem_vma_destroy(struct i915_vma *vma);
@@ -2889,7 +2891,6 @@ struct drm_i915_gem_request *
i915_gem_find_active_request(struct intel_engine_cs *ring);
bool i915_gem_retire_requests(struct drm_device *dev);
-void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
bool interruptible);
@@ -2261,6 +2261,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
for (i = 0; i < page_count; i++) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
if (IS_ERR(page)) {
+ i915_gem_retire_requests(dev_priv->dev);
+ page = shmem_read_mapping_page_gfp(mapping, i, gfp);
+ }
+ if (IS_ERR(page)) {
i915_gem_shrink(dev_priv,
page_count,
I915_SHRINK_BOUND |
@@ -2880,7 +2884,7 @@ void i915_gem_reset(struct drm_device *dev)
/**
* This function clears the request list as sequence numbers are passed.
*/
-void
+static void
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
{
WARN_ON(i915_verify_lists(ring->dev));
@@ -3037,6 +3041,17 @@ retire:
return 0;
}
+void i915_gem_close_object(struct drm_gem_object *gem,
+ struct drm_file *file)
+{
+ struct drm_i915_gem_object *obj = to_intel_bo(gem);
+
+ if (obj->active && mutex_trylock(&obj->base.dev->struct_mutex)) {
+ (void)i915_gem_object_flush_active(obj);
+ mutex_unlock(&obj->base.dev->struct_mutex);
+ }
+}
+
/**
* i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
* @DRM_IOCTL_ARGS: standard ioctl arguments
@@ -41,15 +41,15 @@
/**
* i915_gem_batch_pool_init() - initialize a batch buffer pool
- * @dev: the drm device
+ * @engine: the associated request submission engine
* @pool: the batch buffer pool
*/
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool)
{
int n;
- pool->dev = dev;
+ pool->engine = engine;
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++)
INIT_LIST_HEAD(&pool->cache_list[n]);
@@ -65,7 +65,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool)
{
int n;
- WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+ lockdep_assert_held(&pool->engine->dev->struct_mutex);
for (n = 0; n < ARRAY_SIZE(pool->cache_list); n++) {
while (!list_empty(&pool->cache_list[n])) {
@@ -102,7 +102,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
struct list_head *list;
int n;
- WARN_ON(!mutex_is_locked(&pool->dev->struct_mutex));
+ lockdep_assert_held(&pool->engine->dev->struct_mutex);
/* Compute a power-of-two bucket, but throw everything greater than
* 16KiB into the same bucket: i.e. the the buckets hold objects of
@@ -115,8 +115,17 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
list_for_each_entry_safe(tmp, next, list, batch_pool_link) {
/* The batches are strictly LRU ordered */
- if (tmp->active)
- break;
+ if (tmp->active) {
+ struct drm_i915_gem_request *rq;
+
+ /* XXX To be simplified later! */
+ rq = tmp->last_read_req[pool->engine->id];
+ if (!i915_gem_request_completed(rq, true))
+ break;
+
+ BUG_ON(tmp->active & ~intel_ring_flag(pool->engine));
+ BUG_ON(tmp->last_write_req);
+ }
/* While we're looping, do some clean up */
if (tmp->madv == __I915_MADV_PURGED) {
@@ -134,7 +143,7 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
if (obj == NULL) {
int ret;
- obj = i915_gem_alloc_object(pool->dev, size);
+ obj = i915_gem_alloc_object(pool->engine->dev, size);
if (obj == NULL)
return ERR_PTR(-ENOMEM);
@@ -27,13 +27,16 @@
#include "i915_drv.h"
+struct drm_device;
+struct intel_engine_cs;
+
struct i915_gem_batch_pool {
- struct drm_device *dev;
+ struct intel_engine_cs *engine;
struct list_head cache_list[4];
};
/* i915_gem_batch_pool.c */
-void i915_gem_batch_pool_init(struct drm_device *dev,
+void i915_gem_batch_pool_init(struct intel_engine_cs *engine,
struct i915_gem_batch_pool *pool);
void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool);
struct drm_i915_gem_object*
@@ -712,8 +712,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
- i915_gem_retire_requests_ring(ring);
-
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
INIT_LIST_HEAD(&ordered_vmas);
@@ -216,6 +216,8 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
if (!i915_gem_shrinker_lock(dev, &unlock))
return 0;
+ i915_gem_retire_requests(dev);
+
count = 0;
list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
if (obj->pages_pin_count == 0)
@@ -1654,7 +1654,7 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
- i915_gem_batch_pool_init(dev, &ring->batch_pool);
+ i915_gem_batch_pool_init(ring, &ring->batch_pool);
init_waitqueue_head(&ring->irq_queue);
INIT_LIST_HEAD(&ring->execlist_queue);
@@ -2045,23 +2045,23 @@ int intel_alloc_ringbuffer_obj(struct drm_device *dev,
static int intel_init_ring_buffer(struct drm_device *dev,
struct intel_engine_cs *engine)
{
- struct intel_ringbuffer *ringbuf;
+ struct intel_ringbuffer *ring;
int ret;
WARN_ON(engine->buffer);
- ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
- if (!ringbuf)
+ ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+ if (!ring)
return -ENOMEM;
- engine->buffer = ringbuf;
+ engine->buffer = ring;
engine->dev = dev;
INIT_LIST_HEAD(&engine->active_list);
INIT_LIST_HEAD(&engine->request_list);
INIT_LIST_HEAD(&engine->execlist_queue);
- i915_gem_batch_pool_init(dev, &engine->batch_pool);
- ringbuf->size = 32 * PAGE_SIZE;
- ringbuf->engine = engine;
+ i915_gem_batch_pool_init(engine, &engine->batch_pool);
+ ring->size = 32 * PAGE_SIZE;
+ ring->engine = engine;
memset(engine->semaphore.sync_seqno, 0, sizeof(engine->semaphore.sync_seqno));
init_waitqueue_head(&engine->irq_queue);
@@ -2077,20 +2077,20 @@ static int intel_init_ring_buffer(struct drm_device *dev,
goto error;
}
- WARN_ON(ringbuf->obj);
+ WARN_ON(ring->obj);
- ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+ ret = intel_alloc_ringbuffer_obj(dev, ring);
if (ret) {
DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
engine->name, ret);
goto error;
}
- ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
+ ret = intel_pin_and_map_ringbuffer_obj(dev, ring);
if (ret) {
DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
engine->name, ret);
- intel_destroy_ringbuffer_obj(ringbuf);
+ intel_destroy_ringbuffer_obj(ring);
goto error;
}
@@ -2098,9 +2098,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
* the TAIL pointer points to within the last 2 cachelines
* of the buffer.
*/
- ringbuf->effective_size = ringbuf->size;
+ ring->effective_size = ring->size;
if (IS_I830(dev) || IS_845G(dev))
- ringbuf->effective_size -= 2 * CACHELINE_BYTES;
+ ring->effective_size -= 2 * CACHELINE_BYTES;
ret = i915_cmd_parser_init_ring(engine);
if (ret)
@@ -2109,7 +2109,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
return 0;
error:
- kfree(ringbuf);
+ kfree(ring);
engine->buffer = NULL;
return ret;
}
This reimplements the denial-of-service protection against igt from commit 227f782e4667fc622810bce8be8ccdeee45f89c2 Author: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu May 15 10:41:42 2014 +0100 drm/i915: Retire requests before creating a new one and transfers the stall from before each batch into a new close handler. The issue is that the stall is increasing latency between batches which is detrimental in some cases (especially coupled with execlists) to keeping the GPU well fed. Also we make the observation that retiring requests can of itself free objects (and requests) and therefore makes a good first step when shrinking. However, we do wish to do a retire before forcing an allocation of a new batch pool object (prior to an execbuffer), but we make the optimisation that we only need to do so if the oldest available batch pool object is active. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 17 ++++++++++++++++- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 25 +++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_batch_pool.h | 7 +++++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 -- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 ++ drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 28 ++++++++++++++-------------- 9 files changed, 58 insertions(+), 29 deletions(-)