@@ -476,7 +476,11 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
i915_gem_obj_finish_shmem_access(bb->obj);
bb->accessing = false;
- i915_vma_move_to_active(bb->vma, workload->req, 0);
+ ret = i915_vma_move_to_active(bb->vma,
+ workload->req,
+ 0);
+ if (ret)
+ goto err;
}
}
return 0;
@@ -3122,9 +3122,9 @@ i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj)
}
int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
-void i915_vma_move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags);
+int __must_check i915_vma_move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags);
int i915_gem_dumb_create(struct drm_file *file_priv,
struct drm_device *dev,
struct drm_mode_create_dumb *args);
@@ -3074,25 +3074,6 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
return err;
}
-static void skip_request(struct i915_request *request)
-{
- void *vaddr = request->ring->vaddr;
- u32 head;
-
- /* As this request likely depends on state from the lost
- * context, clear out all the user operations leaving the
- * breadcrumb at the end (so we get the fence notifications).
- */
- head = request->head;
- if (request->postfix < head) {
- memset(vaddr + head, 0, request->ring->size - head);
- head = 0;
- }
- memset(vaddr + head, 0, request->postfix - head);
-
- dma_fence_set_error(&request->fence, -EIO);
-}
-
static void engine_skip_context(struct i915_request *request)
{
struct intel_engine_cs *engine = request->engine;
@@ -3107,10 +3088,10 @@ static void engine_skip_context(struct i915_request *request)
list_for_each_entry_continue(request, &engine->timeline.requests, link)
if (request->gem_context == hung_ctx)
- skip_request(request);
+ i915_request_skip(request);
list_for_each_entry(request, &timeline->requests, link)
- skip_request(request);
+ i915_request_skip(request);
spin_unlock(&timeline->lock);
spin_unlock_irqrestore(&engine->timeline.lock, flags);
@@ -3153,7 +3134,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
if (stalled) {
i915_gem_context_mark_guilty(request->gem_context);
- skip_request(request);
+ i915_request_skip(request);
/* If this context is now banned, skip all pending requests. */
if (i915_gem_context_is_banned(request->gem_context))
@@ -1155,12 +1155,16 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_request;
GEM_BUG_ON(!reservation_object_test_signaled_rcu(batch->resv, true));
- i915_vma_move_to_active(batch, rq, 0);
- i915_vma_unpin(batch);
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (err)
+ goto skip_request;
- i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto skip_request;
rq->batch = batch;
+ i915_vma_unpin(batch);
cache->rq = rq;
cache->rq_cmd = cmd;
@@ -1169,6 +1173,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
/* Return with batch mapping (cmd) still pinned */
return 0;
+skip_request:
+ i915_request_skip(rq);
err_request:
i915_request_add(rq);
err_unpin:
@@ -1808,7 +1814,11 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
unsigned int flags = eb->flags[i];
struct i915_vma *vma = eb->vma[i];
- i915_vma_move_to_active(vma, eb->request, flags);
+ err = i915_vma_move_to_active(vma, eb->request, flags);
+ if (unlikely(err)) {
+ i915_request_skip(eb->request);
+ return err;
+ }
__eb_unreserve_vma(vma, flags);
vma->exec_flags = NULL;
@@ -1867,9 +1877,9 @@ static void export_fence(struct i915_vma *vma,
reservation_object_unlock(resv);
}
-void i915_vma_move_to_active(struct i915_vma *vma,
- struct i915_request *rq,
- unsigned int flags)
+int i915_vma_move_to_active(struct i915_vma *vma,
+ struct i915_request *rq,
+ unsigned int flags)
{
struct drm_i915_gem_object *obj = vma->obj;
const unsigned int idx = rq->engine->id;
@@ -1906,6 +1916,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
i915_gem_active_set(&vma->last_fence, rq);
export_fence(vma, rq, flags);
+ return 0;
}
static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
@@ -222,7 +222,7 @@ int i915_gem_render_state_emit(struct i915_request *rq)
goto err_unpin;
}
- i915_vma_move_to_active(so.vma, rq, 0);
+ err = i915_vma_move_to_active(so.vma, rq, 0);
err_unpin:
i915_vma_unpin(so.vma);
err_vma:
@@ -962,6 +962,25 @@ i915_request_await_object(struct i915_request *to,
return ret;
}
+void i915_request_skip(struct i915_request *request)
+{
+ void *vaddr = request->ring->vaddr;
+ u32 head;
+
+ /* As this request likely depends on state from the lost
+ * context, clear out all the user operations leaving the
+ * breadcrumb at the end (so we get the fence notifications).
+ */
+ head = request->head;
+ if (request->postfix < head) {
+ memset(vaddr + head, 0, request->ring->size - head);
+ head = 0;
+ }
+ memset(vaddr + head, 0, request->postfix - head);
+
+ dma_fence_set_error(&request->fence, -EIO);
+}
+
/*
* NB: This function is not allowed to fail. Doing so would mean the the
* request is not being tracked for completion but the work itself is
@@ -258,6 +258,8 @@ void i915_request_add(struct i915_request *rq);
void __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);
+void i915_request_skip(struct i915_request *request);
+
void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request);
@@ -985,7 +985,10 @@ static int gpu_write(struct i915_vma *vma,
goto err_request;
}
- i915_vma_move_to_active(batch, rq, 0);
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (err)
+ goto err_request;
+
i915_gem_object_set_active_reference(batch->obj);
i915_vma_unpin(batch);
i915_vma_close(batch);
@@ -996,7 +999,9 @@ static int gpu_write(struct i915_vma *vma,
if (err)
goto err_request;
- i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ i915_request_skip(rq);
err_request:
i915_request_add(rq);
@@ -222,12 +222,12 @@ static int gpu_set(struct drm_i915_gem_object *obj,
}
intel_ring_advance(rq, cs);
- i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
i915_vma_unpin(vma);
i915_request_add(rq);
- return 0;
+ return err;
}
static bool always_valid(struct drm_i915_private *i915)
@@ -170,18 +170,26 @@ static int gpu_fill(struct drm_i915_gem_object *obj,
if (err)
goto err_request;
- i915_vma_move_to_active(batch, rq, 0);
+ err = i915_vma_move_to_active(batch, rq, 0);
+ if (err)
+ goto skip_request;
+
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto skip_request;
+
i915_gem_object_set_active_reference(batch->obj);
i915_vma_unpin(batch);
i915_vma_close(batch);
- i915_vma_move_to_active(vma, rq, 0);
i915_vma_unpin(vma);
i915_request_add(rq);
return 0;
+skip_request:
+ i915_request_skip(rq);
err_request:
i915_request_add(rq);
err_batch:
@@ -454,12 +454,13 @@ static int make_obj_busy(struct drm_i915_gem_object *obj)
return PTR_ERR(rq);
}
- i915_vma_move_to_active(vma, rq, 0);
+ err = i915_vma_move_to_active(vma, rq, 0);
i915_request_add(rq);
- i915_gem_object_set_active_reference(obj);
+ __i915_gem_object_release_unless_active(obj);
i915_vma_unpin(vma);
- return 0;
+
+ return err;
}
static bool assert_mmap_offset(struct drm_i915_private *i915,
@@ -678,7 +678,9 @@ static int live_all_engines(void *arg)
i915_gem_object_set_active_reference(batch->obj);
}
- i915_vma_move_to_active(batch, request[id], 0);
+ err = i915_vma_move_to_active(batch, request[id], 0);
+ GEM_BUG_ON(err);
+
i915_request_get(request[id]);
i915_request_add(request[id]);
}
@@ -788,7 +790,9 @@ static int live_sequential_engines(void *arg)
GEM_BUG_ON(err);
request[id]->batch = batch;
- i915_vma_move_to_active(batch, request[id], 0);
+ err = i915_vma_move_to_active(batch, request[id], 0);
+ GEM_BUG_ON(err);
+
i915_gem_object_set_active_reference(batch->obj);
i915_vma_get(batch);
@@ -130,13 +130,19 @@ static int emit_recurse_batch(struct hang *h,
if (err)
goto unpin_vma;
- i915_vma_move_to_active(vma, rq, 0);
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (err)
+ goto unpin_hws;
+
if (!i915_gem_object_has_active_reference(vma->obj)) {
i915_gem_object_get(vma->obj);
i915_gem_object_set_active_reference(vma->obj);
}
- i915_vma_move_to_active(hws, rq, 0);
+ err = i915_vma_move_to_active(hws, rq, 0);
+ if (err)
+ goto unpin_hws;
+
if (!i915_gem_object_has_active_reference(hws->obj)) {
i915_gem_object_get(hws->obj);
i915_gem_object_set_active_reference(hws->obj);
@@ -205,6 +211,7 @@ static int emit_recurse_batch(struct hang *h,
err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags);
+unpin_hws:
i915_vma_unpin(hws);
unpin_vma:
i915_vma_unpin(vma);
@@ -104,13 +104,19 @@ static int emit_recurse_batch(struct spinner *spin,
if (err)
goto unpin_vma;
- i915_vma_move_to_active(vma, rq, 0);
+ err = i915_vma_move_to_active(vma, rq, 0);
+ if (err)
+ goto unpin_hws;
+
if (!i915_gem_object_has_active_reference(vma->obj)) {
i915_gem_object_get(vma->obj);
i915_gem_object_set_active_reference(vma->obj);
}
- i915_vma_move_to_active(hws, rq, 0);
+ err = i915_vma_move_to_active(hws, rq, 0);
+ if (err)
+ goto unpin_hws;
+
if (!i915_gem_object_has_active_reference(hws->obj)) {
i915_gem_object_get(hws->obj);
i915_gem_object_set_active_reference(hws->obj);
@@ -134,6 +140,7 @@ static int emit_recurse_batch(struct spinner *spin,
err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0);
+unpin_hws:
i915_vma_unpin(hws);
unpin_vma:
i915_vma_unpin(vma);
@@ -49,6 +49,10 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
goto err_pin;
}
+ err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+ if (err)
+ goto err_req;
+
srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
if (INTEL_GEN(ctx->i915) >= 8)
srm++;
@@ -67,7 +71,6 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
}
intel_ring_advance(rq, cs);
- i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
reservation_object_lock(vma->resv, NULL);
reservation_object_add_excl_fence(vma->resv, &rq->fence);
reservation_object_unlock(vma->resv);
Handling such a late error in request construction is tricky, but to accommodate future patches which may allocate here, we potentially could err. To handle the error after already adjusting global state to track the new request, we must finish and submit the request. But we don't want to use the request as not everything is being tracked by it, so we opt to cancel the commands inside the request. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++++- drivers/gpu/drm/i915/i915_drv.h | 6 ++--- drivers/gpu/drm/i915/i915_gem.c | 25 +++---------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 25 +++++++++++++------ drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_request.c | 19 ++++++++++++++ drivers/gpu/drm/i915/i915_request.h | 2 ++ drivers/gpu/drm/i915/selftests/huge_pages.c | 9 +++++-- .../drm/i915/selftests/i915_gem_coherency.c | 4 +-- .../gpu/drm/i915/selftests/i915_gem_context.c | 12 +++++++-- .../gpu/drm/i915/selftests/i915_gem_object.c | 7 +++--- drivers/gpu/drm/i915/selftests/i915_request.c | 8 ++++-- .../gpu/drm/i915/selftests/intel_hangcheck.c | 11 ++++++-- drivers/gpu/drm/i915/selftests/intel_lrc.c | 11 ++++++-- .../drm/i915/selftests/intel_workarounds.c | 5 +++- 15 files changed, 102 insertions(+), 50 deletions(-)