@@ -2978,8 +2978,6 @@ i915_gem_find_active_request(struct intel_engine_cs *ring);
bool i915_gem_retire_requests(struct drm_device *dev);
void i915_gem_retire_requests_ring(struct intel_engine_cs *ring);
-int __must_check i915_gem_check_wedge(struct i915_gpu_error *error,
- bool interruptible);
static inline u32 i915_reset_counter(struct i915_gpu_error *error)
{
@@ -206,11 +206,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret) {
+ if (WARN_ON(ret)) {
/* In the event of a disaster, abandon all caches and
* hope for the best.
*/
- WARN_ON(ret != -EIO);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -1104,15 +1103,13 @@ put_rpm:
return ret;
}
-int
-i915_gem_check_wedge(struct i915_gpu_error *error,
- bool interruptible)
+static int
+i915_gem_check_wedge(unsigned reset_counter, bool interruptible)
{
- if (i915_reset_in_progress_or_wedged(error)) {
- /* Recovery complete, but the reset failed ... */
- if (i915_terminally_wedged(error))
- return -EIO;
+ if (__i915_terminally_wedged(reset_counter))
+ return -EIO;
+ if (__i915_reset_in_progress(reset_counter)) {
/* Non-interruptible callers can't handle -EAGAIN, hence return
* -EIO unconditionally for these. */
if (!interruptible)
@@ -1283,13 +1280,14 @@ int __i915_wait_request(struct drm_i915_gem_request *req,
prepare_to_wait(&ring->irq_queue, &wait, state);
/* We need to check whether any gpu reset happened in between
- * the caller grabbing the seqno and now ... */
+ * the request being submitted and now. If a reset has occurred,
+ * the request is effectively complete (we either are in the
+ * process of or have discarded the rendering and completely
+ * reset the GPU. The results of the request are lost and we
+ * are free to continue on with the original operation.
+ */
if (req->reset_counter != i915_reset_counter(&dev_priv->gpu_error)) {
- /* ... but upgrade the -EAGAIN to an -EIO if the gpu
- * is truely gone. */
- ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
- if (ret == 0)
- ret = -EAGAIN;
+ ret = 0;
break;
}
@@ -2162,11 +2160,10 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret) {
+ if (WARN_ON(ret)) {
/* In the event of a disaster, abandon all caches and
* hope for the best.
*/
- WARN_ON(ret != -EIO);
i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@@ -2686,8 +2683,11 @@ int i915_gem_request_alloc(struct intel_engine_cs *ring,
*req_out = NULL;
- ret = i915_gem_check_wedge(&dev_priv->gpu_error,
- dev_priv->mm.interruptible);
+ /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+ * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex
+ * and restart.
+ */
+ ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible);
if (ret)
return ret;
@@ -4088,9 +4088,9 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
if (ret)
return ret;
- ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
- if (ret)
- return ret;
+ /* ABI: return -EIO if already wedged */
+ if (i915_terminally_wedged(&dev_priv->gpu_error))
+ return -EIO;
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
@@ -81,10 +81,8 @@ static void __cancel_userptr__worker(struct work_struct *work)
was_interruptible = dev_priv->mm.interruptible;
dev_priv->mm.interruptible = false;
- list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link) {
- int ret = i915_vma_unbind(vma);
- WARN_ON(ret && ret != -EIO);
- }
+ list_for_each_entry_safe(vma, tmp, &obj->vma_list, vma_link)
+ WARN_ON(i915_vma_unbind(vma));
WARN_ON(i915_gem_object_put_pages(obj));
dev_priv->mm.interruptible = was_interruptible;
@@ -13516,11 +13516,9 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
ret = __i915_wait_request(intel_plane_state->wait_req,
true, NULL, NULL);
-
- /* Swallow -EIO errors to allow updates during hw lockup. */
- if (ret == -EIO)
- ret = 0;
if (ret) {
+ /* Any hang should be swallowed by the wait */
+ WARN_ON(ret == -EIO);
mutex_lock(&dev->struct_mutex);
drm_atomic_helper_cleanup_planes(dev, state);
mutex_unlock(&dev->struct_mutex);
@@ -13889,10 +13887,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
*/
if (needs_modeset(crtc_state))
ret = i915_gem_object_wait_rendering(old_obj, true);
-
- /* Swallow -EIO errors to allow updates during hw lockup. */
- if (ret && ret != -EIO)
+ if (ret) {
+ /* GPU hangs should have been swallowed by the wait */
+ WARN_ON(ret == -EIO);
return ret;
+ }
}
/* For framebuffer backed by dmabuf, wait for fence */
@@ -1004,7 +1004,7 @@ void intel_logical_ring_stop(struct intel_engine_cs *ring)
return;
ret = intel_ring_idle(ring);
- if (ret && !i915_reset_in_progress_or_wedged(&to_i915(ring->dev)->gpu_error))
+ if (ret)
DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
ring->name, ret);
@@ -3062,7 +3062,7 @@ intel_stop_ring_buffer(struct intel_engine_cs *ring)
return;
ret = intel_ring_idle(ring);
- if (ret && !i915_reset_in_progress_or_wedged(&to_i915(ring->dev)->gpu_error))
+ if (ret)
DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
ring->name, ret);