Message ID | 20190121222117.23305-9-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/34] drm/i915/execlists: Mark up priority boost on preemption | expand |
On 1/21/2019 14:20, Chris Wilson wrote: > In preparation for the next few commits, make resetting the GPU atomic. > Currently, we have prepared gen6+ for atomic resetting of individual > engines, but now there is a requirement to perform the whole device > level reset (just the register poking) from inside an atomic context. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_reset.c | 50 +++++++++++++++++-------------- > 1 file changed, 27 insertions(+), 23 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c > index 342d9ee42601..b9d0ea70361c 100644 > --- a/drivers/gpu/drm/i915/i915_reset.c > +++ b/drivers/gpu/drm/i915/i915_reset.c > @@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915, > > /* Assert reset for at least 20 usec, and wait for acknowledgement. */ > pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); > - usleep_range(50, 200); > - err = wait_for(i915_in_reset(pdev), 500); > + udelay(50); > + err = wait_for_atomic(i915_in_reset(pdev), 50); Is it known to be safe to reduce all of these time out values? Where did the originally 500ms value come from? Is there any chance of getting sporadic failures because 50ms is borderline in the worst case scenario? It still sounds huge but an order of magnitude change in a timeout always seems worrying! > > /* Clear the reset request. */ > pci_write_config_byte(pdev, I915_GDRST, 0); > - usleep_range(50, 200); > + udelay(50); > if (!err) > - err = wait_for(!i915_in_reset(pdev), 500); > + err = wait_for_atomic(!i915_in_reset(pdev), 50); > > return err; > } > @@ -171,7 +171,7 @@ static int g33_do_reset(struct drm_i915_private *i915, > struct pci_dev *pdev = i915->drm.pdev; > > pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); > - return wait_for(g4x_reset_complete(pdev), 500); > + return wait_for_atomic(g4x_reset_complete(pdev), 50); > } > > static int g4x_do_reset(struct drm_i915_private *dev_priv, > @@ -182,13 +182,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, > int ret; > > /* WaVcpClkGateDisableForMediaReset:ctg,elk */ > - I915_WRITE(VDECCLK_GATE_D, > - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); > - POSTING_READ(VDECCLK_GATE_D); > + I915_WRITE_FW(VDECCLK_GATE_D, > + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); > + POSTING_READ_FW(VDECCLK_GATE_D); > > pci_write_config_byte(pdev, I915_GDRST, > GRDOM_MEDIA | GRDOM_RESET_ENABLE); > - ret = wait_for(g4x_reset_complete(pdev), 500); > + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); > if (ret) { > DRM_DEBUG_DRIVER("Wait for media reset failed\n"); > goto out; > @@ -196,7 +196,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, > > pci_write_config_byte(pdev, I915_GDRST, > GRDOM_RENDER | GRDOM_RESET_ENABLE); > - ret = wait_for(g4x_reset_complete(pdev), 500); > + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); > if (ret) { > DRM_DEBUG_DRIVER("Wait for render reset failed\n"); > goto out; > @@ -205,9 +205,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, > out: > pci_write_config_byte(pdev, I915_GDRST, 0); > > - I915_WRITE(VDECCLK_GATE_D, > - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); > - POSTING_READ(VDECCLK_GATE_D); > + I915_WRITE_FW(VDECCLK_GATE_D, > + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); > + POSTING_READ_FW(VDECCLK_GATE_D); > > return ret; > } > @@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, > { > int ret; > > - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); > - ret = intel_wait_for_register(dev_priv, > - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, > - 500); > + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); > + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, > + ILK_GRDOM_RESET_ENABLE, 0, > + 5000, 0, > + NULL); These two timeouts are now two orders of magnitude smaller? It was 500ms but is now 5000us (=5ms)? John. > if (ret) { > DRM_DEBUG_DRIVER("Wait for render reset failed\n"); > goto out; > } > > - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); > - ret = intel_wait_for_register(dev_priv, > - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, > - 500); > + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); > + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, > + ILK_GRDOM_RESET_ENABLE, 0, > + 5000, 0, > + NULL); > if (ret) { > DRM_DEBUG_DRIVER("Wait for media reset failed\n"); > goto out; > } > > out: > - I915_WRITE(ILK_GDSR, 0); > - POSTING_READ(ILK_GDSR); > + I915_WRITE_FW(ILK_GDSR, 0); > + POSTING_READ_FW(ILK_GDSR); > return ret; > } > > @@ -572,7 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) > ret = -ENODEV; > if (reset) { > GEM_TRACE("engine_mask=%x\n", engine_mask); > + preempt_disable(); > ret = reset(i915, engine_mask, retry); > + preempt_enable(); > } > if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) > break;
Quoting John Harrison (2019-01-22 22:19:04) > On 1/21/2019 14:20, Chris Wilson wrote: > > In preparation for the next few commits, make resetting the GPU atomic. > > Currently, we have prepared gen6+ for atomic resetting of individual > > engines, but now there is a requirement to perform the whole device > > level reset (just the register poking) from inside an atomic context. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > > --- > > drivers/gpu/drm/i915/i915_reset.c | 50 +++++++++++++++++-------------- > > 1 file changed, 27 insertions(+), 23 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c > > index 342d9ee42601..b9d0ea70361c 100644 > > --- a/drivers/gpu/drm/i915/i915_reset.c > > +++ b/drivers/gpu/drm/i915/i915_reset.c > > @@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915, > > > > /* Assert reset for at least 20 usec, and wait for acknowledgement. */ > > pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); > > - usleep_range(50, 200); > > - err = wait_for(i915_in_reset(pdev), 500); > > + udelay(50); > > + err = wait_for_atomic(i915_in_reset(pdev), 50); > Is it known to be safe to reduce all of these time out values? Where did > the originally 500ms value come from? I chose it entirely upon a whim, picking a huge number unlikely to ever be exceeded, and if it were we would be right to conclude the HW was unrecoverable. > Is there any chance of getting > sporadic failures because 50ms is borderline in the worst case scenario? > It still sounds huge but an order of magnitude change in a timeout > always seems worrying! Whereas 50us is more in line with the little bits of documentation that still exist. > > @@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, > > { > > int ret; > > > > - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); > > - ret = intel_wait_for_register(dev_priv, > > - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, > > - 500); > > + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); > > + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, > > + ILK_GRDOM_RESET_ENABLE, 0, > > + 5000, 0, > > + NULL); > These two timeouts are now two orders of magnitude smaller? It was 500ms > but is now 5000us (=5ms)? 0.5 was the same number plucked from the air. No guidance here, that I know of, except we have lots of runs through CI to try and estimate bounds. -Chris
John Harrison <John.C.Harrison@Intel.com> writes: > On 1/21/2019 14:20, Chris Wilson wrote: >> In preparation for the next few commits, make resetting the GPU atomic. >> Currently, we have prepared gen6+ for atomic resetting of individual >> engines, but now there is a requirement to perform the whole device >> level reset (just the register poking) from inside an atomic context. >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> --- >> drivers/gpu/drm/i915/i915_reset.c | 50 +++++++++++++++++-------------- >> 1 file changed, 27 insertions(+), 23 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c >> index 342d9ee42601..b9d0ea70361c 100644 >> --- a/drivers/gpu/drm/i915/i915_reset.c >> +++ b/drivers/gpu/drm/i915/i915_reset.c >> @@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915, >> >> /* Assert reset for at least 20 usec, and wait for acknowledgement. */ >> pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); >> - usleep_range(50, 200); >> - err = wait_for(i915_in_reset(pdev), 500); >> + udelay(50); >> + err = wait_for_atomic(i915_in_reset(pdev), 50); > Is it known to be safe to reduce all of these time out values? Where did > the originally 500ms value come from? Is there any chance of getting > sporadic failures because 50ms is borderline in the worst case scenario? > It still sounds huge but an order of magnitude change in a timeout > always seems worrying! > >> >> /* Clear the reset request. */ >> pci_write_config_byte(pdev, I915_GDRST, 0); >> - usleep_range(50, 200); >> + udelay(50); >> if (!err) >> - err = wait_for(!i915_in_reset(pdev), 500); >> + err = wait_for_atomic(!i915_in_reset(pdev), 50); >> >> return err; >> } >> @@ -171,7 +171,7 @@ static int g33_do_reset(struct drm_i915_private *i915, >> struct pci_dev *pdev = i915->drm.pdev; >> >> pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); >> - return wait_for(g4x_reset_complete(pdev), 500); >> + return wait_for_atomic(g4x_reset_complete(pdev), 50); >> } >> >> static int g4x_do_reset(struct drm_i915_private *dev_priv, >> @@ -182,13 +182,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, >> int ret; >> >> /* WaVcpClkGateDisableForMediaReset:ctg,elk */ >> - I915_WRITE(VDECCLK_GATE_D, >> - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); >> - POSTING_READ(VDECCLK_GATE_D); >> + I915_WRITE_FW(VDECCLK_GATE_D, >> + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); >> + POSTING_READ_FW(VDECCLK_GATE_D); >> >> pci_write_config_byte(pdev, I915_GDRST, >> GRDOM_MEDIA | GRDOM_RESET_ENABLE); >> - ret = wait_for(g4x_reset_complete(pdev), 500); >> + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); >> if (ret) { >> DRM_DEBUG_DRIVER("Wait for media reset failed\n"); >> goto out; >> @@ -196,7 +196,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, >> >> pci_write_config_byte(pdev, I915_GDRST, >> GRDOM_RENDER | GRDOM_RESET_ENABLE); >> - ret = wait_for(g4x_reset_complete(pdev), 500); >> + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); >> if (ret) { >> DRM_DEBUG_DRIVER("Wait for render reset failed\n"); >> goto out; >> @@ -205,9 +205,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, >> out: >> pci_write_config_byte(pdev, I915_GDRST, 0); >> >> - I915_WRITE(VDECCLK_GATE_D, >> - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); >> - POSTING_READ(VDECCLK_GATE_D); >> + I915_WRITE_FW(VDECCLK_GATE_D, >> + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); >> + POSTING_READ_FW(VDECCLK_GATE_D); >> >> return ret; >> } >> @@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, >> { >> int ret; >> >> - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); >> - ret = intel_wait_for_register(dev_priv, >> - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, >> - 500); >> + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); >> + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, >> + ILK_GRDOM_RESET_ENABLE, 0, >> + 5000, 0, >> + NULL); > These two timeouts are now two orders of magnitude smaller? It was 500ms > but is now 5000us (=5ms)? Agreed. I indirecty raised same concern on previous round of review by saying that it would be nice if we had some statistics from CI. The original ballooning of these numbers, from the little that is available on documentation, is the fact that previously, it didn't do much harm to pick a large number to be on safe side, so why not. Now, it is a different game. -Mika > > John. > > >> if (ret) { >> DRM_DEBUG_DRIVER("Wait for render reset failed\n"); >> goto out; >> } >> >> - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); >> - ret = intel_wait_for_register(dev_priv, >> - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, >> - 500); >> + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); >> + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, >> + ILK_GRDOM_RESET_ENABLE, 0, >> + 5000, 0, >> + NULL); >> if (ret) { >> DRM_DEBUG_DRIVER("Wait for media reset failed\n"); >> goto out; >> } >> >> out: >> - I915_WRITE(ILK_GDSR, 0); >> - POSTING_READ(ILK_GDSR); >> + I915_WRITE_FW(ILK_GDSR, 0); >> + POSTING_READ_FW(ILK_GDSR); >> return ret; >> } >> >> @@ -572,7 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) >> ret = -ENODEV; >> if (reset) { >> GEM_TRACE("engine_mask=%x\n", engine_mask); >> + preempt_disable(); >> ret = reset(i915, engine_mask, retry); >> + preempt_enable(); >> } >> if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) >> break;
diff --git a/drivers/gpu/drm/i915/i915_reset.c b/drivers/gpu/drm/i915/i915_reset.c index 342d9ee42601..b9d0ea70361c 100644 --- a/drivers/gpu/drm/i915/i915_reset.c +++ b/drivers/gpu/drm/i915/i915_reset.c @@ -144,14 +144,14 @@ static int i915_do_reset(struct drm_i915_private *i915, /* Assert reset for at least 20 usec, and wait for acknowledgement. */ pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - usleep_range(50, 200); - err = wait_for(i915_in_reset(pdev), 500); + udelay(50); + err = wait_for_atomic(i915_in_reset(pdev), 50); /* Clear the reset request. */ pci_write_config_byte(pdev, I915_GDRST, 0); - usleep_range(50, 200); + udelay(50); if (!err) - err = wait_for(!i915_in_reset(pdev), 500); + err = wait_for_atomic(!i915_in_reset(pdev), 50); return err; } @@ -171,7 +171,7 @@ static int g33_do_reset(struct drm_i915_private *i915, struct pci_dev *pdev = i915->drm.pdev; pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); - return wait_for(g4x_reset_complete(pdev), 500); + return wait_for_atomic(g4x_reset_complete(pdev), 50); } static int g4x_do_reset(struct drm_i915_private *dev_priv, @@ -182,13 +182,13 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, int ret; /* WaVcpClkGateDisableForMediaReset:ctg,elk */ - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) | VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); pci_write_config_byte(pdev, I915_GDRST, GRDOM_MEDIA | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; @@ -196,7 +196,7 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, pci_write_config_byte(pdev, I915_GDRST, GRDOM_RENDER | GRDOM_RESET_ENABLE); - ret = wait_for(g4x_reset_complete(pdev), 500); + ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; @@ -205,9 +205,9 @@ static int g4x_do_reset(struct drm_i915_private *dev_priv, out: pci_write_config_byte(pdev, I915_GDRST, 0); - I915_WRITE(VDECCLK_GATE_D, - I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); - POSTING_READ(VDECCLK_GATE_D); + I915_WRITE_FW(VDECCLK_GATE_D, + I915_READ(VDECCLK_GATE_D) & ~VCP_UNIT_CLOCK_GATE_DISABLE); + POSTING_READ_FW(VDECCLK_GATE_D); return ret; } @@ -218,27 +218,29 @@ static int ironlake_do_reset(struct drm_i915_private *dev_priv, { int ret; - I915_WRITE(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for render reset failed\n"); goto out; } - I915_WRITE(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); - ret = intel_wait_for_register(dev_priv, - ILK_GDSR, ILK_GRDOM_RESET_ENABLE, 0, - 500); + I915_WRITE_FW(ILK_GDSR, ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); + ret = __intel_wait_for_register_fw(dev_priv, ILK_GDSR, + ILK_GRDOM_RESET_ENABLE, 0, + 5000, 0, + NULL); if (ret) { DRM_DEBUG_DRIVER("Wait for media reset failed\n"); goto out; } out: - I915_WRITE(ILK_GDSR, 0); - POSTING_READ(ILK_GDSR); + I915_WRITE_FW(ILK_GDSR, 0); + POSTING_READ_FW(ILK_GDSR); return ret; } @@ -572,7 +574,9 @@ int intel_gpu_reset(struct drm_i915_private *i915, unsigned int engine_mask) ret = -ENODEV; if (reset) { GEM_TRACE("engine_mask=%x\n", engine_mask); + preempt_disable(); ret = reset(i915, engine_mask, retry); + preempt_enable(); } if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) break;