diff mbox

[CI,1/2] drm/i915: Try harder to reset the GPU

Message ID 20170518204811.7408-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson May 18, 2017, 8:48 p.m. UTC
Repeat the reset a couple of times if at first we do not succeed.

v2: differentiate which path/engine failed with a debug message

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: http://patchwork.freedesktop.org/patch/msgid/20170513083726.502-1-chris@chris-wilson.co.uk
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/intel_uncore.c | 67 +++++++++++++++++++++++++------------
 1 file changed, 46 insertions(+), 21 deletions(-)

Comments

Chris Wilson May 18, 2017, 9:30 p.m. UTC | #1
On Thu, May 18, 2017 at 09:08:26PM -0000, Patchwork wrote:
> == Series Details ==
> 
> Series: series starting with [CI,1/2] drm/i915: Try harder to reset the GPU
> URL   : https://patchwork.freedesktop.org/series/24650/
> State : success
> 
> == Summary ==
> 
> Series 24650v1 Series without cover letter
> https://patchwork.freedesktop.org/api/1.0/series/24650/revisions/1/mbox/
> 
> fi-bdw-5557u     total:278  pass:267  dwarn:0   dfail:0   fail:0   skip:11  time:445s
> fi-bdw-gvtdvm    total:278  pass:256  dwarn:8   dfail:0   fail:0   skip:14  time:434s
> fi-bsw-n3050     total:278  pass:242  dwarn:0   dfail:0   fail:0   skip:36  time:578s
> fi-bxt-j4205     total:278  pass:259  dwarn:0   dfail:0   fail:0   skip:19  time:515s
> fi-byt-j1900     total:278  pass:254  dwarn:0   dfail:0   fail:0   skip:24  time:488s
> fi-byt-n2820     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:493s
> fi-hsw-4770      total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:414s
> fi-hsw-4770r     total:278  pass:262  dwarn:0   dfail:0   fail:0   skip:16  time:415s
> fi-ilk-650       total:278  pass:228  dwarn:0   dfail:0   fail:0   skip:50  time:428s
> fi-ivb-3520m     total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:496s
> fi-ivb-3770      total:278  pass:260  dwarn:0   dfail:0   fail:0   skip:18  time:468s
> fi-kbl-7500u     total:278  pass:255  dwarn:5   dfail:0   fail:0   skip:18  time:463s
> fi-skl-6260u     total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:464s
> fi-skl-6700hq    total:278  pass:261  dwarn:0   dfail:0   fail:0   skip:17  time:587s
> fi-skl-6700k     total:278  pass:256  dwarn:4   dfail:0   fail:0   skip:18  time:467s
> fi-skl-6770hq    total:278  pass:268  dwarn:0   dfail:0   fail:0   skip:10  time:506s
> fi-skl-gvtdvm    total:278  pass:265  dwarn:0   dfail:0   fail:0   skip:13  time:437s
> fi-snb-2520m     total:278  pass:250  dwarn:0   dfail:0   fail:0   skip:28  time:531s
> fi-snb-2600      total:278  pass:248  dwarn:0   dfail:0   fail:1   skip:29  time:407s
> 
> ab08cb2750e769d074b2f147c8298ccd0cd08340 drm-tip: 2017y-05m-18d-15h-36m-17s UTC integration manifest
> 4b95051 drm/i915: Reorder media/render reset on g4x
> 6ddc95d drm/i915: Try harder to reset the GPU

Pushed this pair to try resets harder, then Mika can apply his patch to
apply a bigger hammer to gen4.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 08d7d08438c0..b27cc00ebf83 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1427,9 +1427,10 @@  int i915_reg_read_ioctl(struct drm_device *dev,
 	return ret;
 }
 
-static int i915_reset_complete(struct pci_dev *pdev)
+static bool i915_reset_complete(struct pci_dev *pdev)
 {
 	u8 gdrst;
+
 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
 	return (gdrst & GRDOM_RESET_STATUS) == 0;
 }
@@ -1440,15 +1441,16 @@  static int i915_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask
 
 	/* assert reset for at least 20 usec */
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
-	udelay(20);
+	usleep_range(50, 200);
 	pci_write_config_byte(pdev, I915_GDRST, 0);
 
 	return wait_for(i915_reset_complete(pdev), 500);
 }
 
-static int g4x_reset_complete(struct pci_dev *pdev)
+static bool g4x_reset_complete(struct pci_dev *pdev)
 {
 	u8 gdrst;
+
 	pci_read_config_byte(pdev, I915_GDRST, &gdrst);
 	return (gdrst & GRDOM_RESET_ENABLE) == 0;
 }
@@ -1456,6 +1458,7 @@  static int g4x_reset_complete(struct pci_dev *pdev)
 static int g33_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
+
 	pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE);
 	return wait_for(g4x_reset_complete(pdev), 500);
 }
@@ -1468,8 +1471,10 @@  static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
 	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
 	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE);
@@ -1478,16 +1483,17 @@  static int g4x_do_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	pci_write_config_byte(pdev, I915_GDRST,
 			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
 	ret =  wait_for(g4x_reset_complete(pdev), 500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+	}
 
 	/* WaVcpClkGateDisableForMediaReset:ctg,elk */
 	I915_WRITE(VDECCLK_GATE_D, I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE);
 	POSTING_READ(VDECCLK_GATE_D);
 
+out:
 	pci_write_config_byte(pdev, I915_GDRST, 0);
-
-	return 0;
+	return ret;
 }
 
 static int ironlake_do_reset(struct drm_i915_private *dev_priv,
@@ -1495,31 +1501,36 @@  static int ironlake_do_reset(struct drm_i915_private *dev_priv,
 {
 	int ret;
 
-	I915_WRITE(ILK_GDSR,
-		   ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE);
 	ret = intel_wait_for_register(dev_priv,
 				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
 				      500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for render reset failed\n");
+		goto out;
+	}
 
-	I915_WRITE(ILK_GDSR,
-		   ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
+	I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE);
 	ret = intel_wait_for_register(dev_priv,
 				      ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0,
 				      500);
-	if (ret)
-		return ret;
+	if (ret) {
+		DRM_DEBUG_DRIVER("Wait for media reset failed\n");
+		goto out;
+	}
 
+out:
 	I915_WRITE(ILK_GDSR, 0);
-
-	return 0;
+	POSTING_READ(ILK_GDSR);
+	return ret;
 }
 
 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */
 static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
 				u32 hw_domain_mask)
 {
+	int err;
+
 	/* GEN6_GDRST is not in the gt power well, no need to check
 	 * for fifo space for the write or forcewake the chip for
 	 * the read
@@ -1527,9 +1538,14 @@  static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv,
 	__raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask);
 
 	/* Wait for the device to ack the reset requests */
-	return intel_wait_for_register_fw(dev_priv,
+	err = intel_wait_for_register_fw(dev_priv,
 					  GEN6_GDRST, hw_domain_mask, 0,
 					  500);
+	if (err)
+		DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n",
+				 hw_domain_mask);
+
+	return err;
 }
 
 /**
@@ -1749,8 +1765,11 @@  static reset_func intel_get_gpu_reset(struct drm_i915_private *dev_priv)
 int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 {
 	reset_func reset;
+	int retry;
 	int ret;
 
+	might_sleep();
+
 	reset = intel_get_gpu_reset(dev_priv);
 	if (reset == NULL)
 		return -ENODEV;
@@ -1759,7 +1778,13 @@  int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask)
 	 * request may be dropped and never completes (causing -EIO).
 	 */
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-	ret = reset(dev_priv, engine_mask);
+	for (retry = 0; retry < 3; retry++) {
+		ret = reset(dev_priv, engine_mask);
+		if (ret != -ETIMEDOUT)
+			break;
+
+		cond_resched();
+	}
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 	return ret;