diff mbox

[1/1] drm/i915: Reset request handling for gen9+

Message ID 1434461963-2438-1-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala June 16, 2015, 1:39 p.m. UTC
In order for skl+ hardware to guarantee that no context switch
takes place during reset and that current context is properly
saved, the driver needs to notify and query hw before commencing
with reset.

We will only proceed with reset if all engines report that they
are ready for reset.

As we skip the reset if any single engine reports not ready, this
commit prevents system hang skl in some situations where the
gpu/blitter is hanged and in such state that any write to generic
reset register (GEN6_GDRST) causes immediate system hang.

References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h     |  3 +++
 drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

Comments

Chris Wilson June 16, 2015, 2:09 p.m. UTC | #1
On Tue, Jun 16, 2015 at 04:39:23PM +0300, Mika Kuoppala wrote:
> In order for skl+ hardware to guarantee that no context switch
> takes place during reset and that current context is properly
> saved, the driver needs to notify and query hw before commencing
> with reset.
> 
> We will only proceed with reset if all engines report that they
> are ready for reset.
> 
> As we skip the reset if any single engine reports not ready, this
> commit prevents system hang skl in some situations where the
> gpu/blitter is hanged and in such state that any write to generic
> reset register (GEN6_GDRST) causes immediate system hang.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
> References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |  3 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
>  2 files changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 0b979ad..3684f92 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1461,6 +1461,9 @@ enum skl_disp_power_wells {
>  #define RING_MAX_IDLE(base)	((base)+0x54)
>  #define RING_HWS_PGA(base)	((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> +#define RING_RESET_CTL(base)	((base)+0xd0)
> +#define   RESET_CTL_REQUEST_RESET  (1 << 0)
> +#define   RESET_CTL_READY_TO_RESET (1 << 1)
>  
>  #define HSW_GTT_CACHE_EN	0x4024
>  #define   GTT_CACHE_EN_ALL	0xF0007FFF
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4a86cf0..404bce2 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1455,9 +1455,39 @@ static int gen6_do_reset(struct drm_device *dev)
>  	return ret;
>  }
>  
> +static int wait_for_bits_set(struct drm_i915_private *dev_priv,
> +			     const u32 reg, const u32 mask, const int timeout)

Use whitespace to group terms, and probably best to call it with both
mask and value for generality.

static int wait_for_register(struct drm_i915_private *dev_priv,
			     const u32 reg,
			     const u32 mask,
			     const u32 value,
			     const unsigend long timeout);

I hope this proves useful elsewhere, do you have a followup patch? It
should reduce the size of our module quite considerably.
-Chris
Chris Wilson June 16, 2015, 5:10 p.m. UTC | #2
On Tue, Jun 16, 2015 at 04:39:23PM +0300, Mika Kuoppala wrote:
> In order for skl+ hardware to guarantee that no context switch
> takes place during reset and that current context is properly
> saved, the driver needs to notify and query hw before commencing
> with reset.
> 
> We will only proceed with reset if all engines report that they
> are ready for reset.
> 
> As we skip the reset if any single engine reports not ready, this
> commit prevents system hang skl in some situations where the
> gpu/blitter is hanged and in such state that any write to generic

s/is hanged/is wedged/ reads better

> reset register (GEN6_GDRST) causes immediate system hang.
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
> References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_reg.h     |  3 +++
>  drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
>  2 files changed, 34 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 0b979ad..3684f92 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1461,6 +1461,9 @@ enum skl_disp_power_wells {
>  #define RING_MAX_IDLE(base)	((base)+0x54)
>  #define RING_HWS_PGA(base)	((base)+0x80)
>  #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> +#define RING_RESET_CTL(base)	((base)+0xd0)
> +#define   RESET_CTL_REQUEST_RESET  (1 << 0)
> +#define   RESET_CTL_READY_TO_RESET (1 << 1)
>  
>  #define HSW_GTT_CACHE_EN	0x4024
>  #define   GTT_CACHE_EN_ALL	0xF0007FFF
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4a86cf0..404bce2 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1455,9 +1455,39 @@ static int gen6_do_reset(struct drm_device *dev)
>  	return ret;
>  }
>  
> +static int wait_for_bits_set(struct drm_i915_private *dev_priv,
> +			     const u32 reg, const u32 mask, const int timeout)
> +{
> +	return wait_for((I915_READ(reg) & mask) == mask, timeout);
> +}
> +
> +static int gen9_do_reset(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_engine_cs *engine;
> +	int ret, i;
> +
> +	for_each_ring(engine, dev_priv, i) {
> +		I915_WRITE(RING_RESET_CTL(engine->mmio_base),
> +			   _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
> +
> +		ret = wait_for_bits_set(dev_priv,
> +					RING_RESET_CTL(engine->mmio_base),
> +					RESET_CTL_READY_TO_RESET, 700);
> +		if (ret) {
> +			DRM_ERROR("%s: reset request timeout\n", engine->name);
> +			return -ENODEV;

return -EIO; since the reset didn't happen due to hardware issues
(ENODEV is that we don't have the implementation for the GPU rather than
it failed).

Do we need any recovery? Do you guarrantee that the GPU reset resets the
CTL register?
-Chris
Tomas Elf June 16, 2015, 7:57 p.m. UTC | #3
On 16/06/2015 14:39, Mika Kuoppala wrote:
> In order for skl+ hardware to guarantee that no context switch
> takes place during reset and that current context is properly
> saved, the driver needs to notify and query hw before commencing
> with reset.
>
> We will only proceed with reset if all engines report that they
> are ready for reset.
>
> As we skip the reset if any single engine reports not ready, this
> commit prevents system hang skl in some situations where the
> gpu/blitter is hanged and in such state that any write to generic
> reset register (GEN6_GDRST) causes immediate system hang.

If it solves an observed problem then that's great. What worries me 
slightly is that we seem to be disabling full GPU reset permanently in 
the case where one or more engines have decided for whatever reason to 
never be ready for reset (who knows what the hardware could be up to?). 
In that case we're permanently toast. Would it make sense to only 
accommodate the engine and attempt reset request a few times and if the 
reset request fails x times in a row we simply ignore the outcome and 
move ahead with the full GPU reset anyway? I mean, at that point, what 
do we got to lose?

If we look beyond this patch for a moment and consider the effects of 
combining this patch with my per-engine reset support RFC series, what 
would happen is the following:

0) Hang detected

1) Engine reset request.

2a) If reset request ok, engine reset -> DONE.

2b) If reset request not ok -> clear reset request bit and FAIL engine 
reset. Go to full GPU reset promotion in step 3).

3) Promote to full GPU reset

4) (In this case there's currently no reset request in the RFC since 
I've never heard anyway say that reset request was necessary when doing 
full GPU reset, only in the engine reset case - we're nuking everything 
anyway. We could do what you're doing here and do a reset request for 
all engines)

5a) If all reset requests are ok, do full GPU resest -> DONE.

5b) If some reset requests are not ok -> Go back to 4) and retry a 
couple of times until we give up and simply reset the GPU as a last resort.

What's interesting here is that we would always request reset both for
both per-engine reset, in which case there _is_ a fall-back path in case 
the reset request fails - promote to full GPU reset), and for full GPU 
reset, in which case we could back off and retry the reset request a 
couple of times and then just ignore the reset request outcome if we 
wanted to.

>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
> References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_reg.h     |  3 +++
>   drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
>   2 files changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 0b979ad..3684f92 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1461,6 +1461,9 @@ enum skl_disp_power_wells {
>   #define RING_MAX_IDLE(base)	((base)+0x54)
>   #define RING_HWS_PGA(base)	((base)+0x80)
>   #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
> +#define RING_RESET_CTL(base)	((base)+0xd0)
> +#define   RESET_CTL_REQUEST_RESET  (1 << 0)
> +#define   RESET_CTL_READY_TO_RESET (1 << 1)
>
>   #define HSW_GTT_CACHE_EN	0x4024
>   #define   GTT_CACHE_EN_ALL	0xF0007FFF
> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
> index 4a86cf0..404bce2 100644
> --- a/drivers/gpu/drm/i915/intel_uncore.c
> +++ b/drivers/gpu/drm/i915/intel_uncore.c
> @@ -1455,9 +1455,39 @@ static int gen6_do_reset(struct drm_device *dev)
>   	return ret;
>   }
>
> +static int wait_for_bits_set(struct drm_i915_private *dev_priv,
> +			     const u32 reg, const u32 mask, const int timeout)
> +{
> +	return wait_for((I915_READ(reg) & mask) == mask, timeout);
> +}
> +
> +static int gen9_do_reset(struct drm_device *dev)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct intel_engine_cs *engine;
> +	int ret, i;
> +
> +	for_each_ring(engine, dev_priv, i) {
> +		I915_WRITE(RING_RESET_CTL(engine->mmio_base),
> +			   _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
> +
> +		ret = wait_for_bits_set(dev_priv,
> +					RING_RESET_CTL(engine->mmio_base),
> +					RESET_CTL_READY_TO_RESET, 700);
> +		if (ret) {
> +			DRM_ERROR("%s: reset request timeout\n", engine->name);
> +			return -ENODEV;

You could clear the reset request bit at this point in order to back off 
from the reset request. I don't know what fall-back procedure would make 
most sense following that point but, hey, that's just one way of doing 
it. It would theoretically allow the command streamer to resume 
executing but then again, we're here because it's hung so I don't know 
if the engine is likely to resume doing anything following this point.

> +		}
> +	}
> +
> +	return gen6_do_reset(dev);
> +}
> +
>   static int (*intel_get_gpu_reset(struct drm_device *dev))(struct drm_device *)
>   {
> -	if (INTEL_INFO(dev)->gen >= 6)
> +	if (INTEL_INFO(dev)->gen >= 9)

This is actually applicable for gen8+ (it's part of my RFC from last 
week) and is the only way to idle an engine preceding a reset so you 
might as well generalise it to gen8 and onwards, not only gen9.

Thanks,
Tomas

> +		return gen9_do_reset;
> +	else if (INTEL_INFO(dev)->gen >= 6)
>   		return gen6_do_reset;
>   	else if (IS_GEN5(dev))
>   		return ironlake_do_reset;
>
Tomas Elf June 16, 2015, 8:15 p.m. UTC | #4
On 16/06/2015 18:10, Chris Wilson wrote:
> On Tue, Jun 16, 2015 at 04:39:23PM +0300, Mika Kuoppala wrote:
>> In order for skl+ hardware to guarantee that no context switch
>> takes place during reset and that current context is properly
>> saved, the driver needs to notify and query hw before commencing
>> with reset.
>>
>> We will only proceed with reset if all engines report that they
>> are ready for reset.
>>
>> As we skip the reset if any single engine reports not ready, this
>> commit prevents system hang skl in some situations where the
>> gpu/blitter is hanged and in such state that any write to generic
>
> s/is hanged/is wedged/ reads better
>
>> reset register (GEN6_GDRST) causes immediate system hang.
>>
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
>> References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_reg.h     |  3 +++
>>   drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
>>   2 files changed, 34 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 0b979ad..3684f92 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -1461,6 +1461,9 @@ enum skl_disp_power_wells {
>>   #define RING_MAX_IDLE(base)	((base)+0x54)
>>   #define RING_HWS_PGA(base)	((base)+0x80)
>>   #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
>> +#define RING_RESET_CTL(base)	((base)+0xd0)
>> +#define   RESET_CTL_REQUEST_RESET  (1 << 0)
>> +#define   RESET_CTL_READY_TO_RESET (1 << 1)
>>
>>   #define HSW_GTT_CACHE_EN	0x4024
>>   #define   GTT_CACHE_EN_ALL	0xF0007FFF
>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>> index 4a86cf0..404bce2 100644
>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>> @@ -1455,9 +1455,39 @@ static int gen6_do_reset(struct drm_device *dev)
>>   	return ret;
>>   }
>>
>> +static int wait_for_bits_set(struct drm_i915_private *dev_priv,
>> +			     const u32 reg, const u32 mask, const int timeout)
>> +{
>> +	return wait_for((I915_READ(reg) & mask) == mask, timeout);
>> +}
>> +
>> +static int gen9_do_reset(struct drm_device *dev)
>> +{
>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>> +	struct intel_engine_cs *engine;
>> +	int ret, i;
>> +
>> +	for_each_ring(engine, dev_priv, i) {
>> +		I915_WRITE(RING_RESET_CTL(engine->mmio_base),
>> +			   _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>> +
>> +		ret = wait_for_bits_set(dev_priv,
>> +					RING_RESET_CTL(engine->mmio_base),
>> +					RESET_CTL_READY_TO_RESET, 700);
>> +		if (ret) {
>> +			DRM_ERROR("%s: reset request timeout\n", engine->name);
>> +			return -ENODEV;
>
> return -EIO; since the reset didn't happen due to hardware issues
> (ENODEV is that we don't have the implementation for the GPU rather than
> it failed).
>
> Do we need any recovery? Do you guarrantee that the GPU reset resets the
> CTL register?
> -Chris

According to the bspec (if I remember correctly from the last time I had 
to deal with it - Mika, correct me if I'm way off here):

If the reset request succeeds the reset request bit is cleared and 
ready_to_reset is set. Following the engine reset both ready_to_reset 
and reset request bits are set to 0. If the reset request fails the 
reset_request bit is obviously still set.

Then again, all of this is assuming engine resets rather than a full GPU 
reset. The bspec does not say anything about what the effect of a full 
gpu reset is on the reset control registers. It's always seemed to me 
like the reset control register is only relevant when doing a per-engine 
reset rather than a full GPU reset but I might very well be wrong about 
that, especially since you guys have seen problems when not involving 
this reset handshake before doing full GPU resets.

Thanks,
Tomas

>
Mika Kuoppala June 17, 2015, 6:33 a.m. UTC | #5
Tomas Elf <tomas.elf@intel.com> writes:

> On 16/06/2015 18:10, Chris Wilson wrote:
>> On Tue, Jun 16, 2015 at 04:39:23PM +0300, Mika Kuoppala wrote:
>>> In order for skl+ hardware to guarantee that no context switch
>>> takes place during reset and that current context is properly
>>> saved, the driver needs to notify and query hw before commencing
>>> with reset.
>>>
>>> We will only proceed with reset if all engines report that they
>>> are ready for reset.
>>>
>>> As we skip the reset if any single engine reports not ready, this
>>> commit prevents system hang skl in some situations where the
>>> gpu/blitter is hanged and in such state that any write to generic
>>
>> s/is hanged/is wedged/ reads better
>>
>>> reset register (GEN6_GDRST) causes immediate system hang.
>>>
>>> References: https://bugs.freedesktop.org/show_bug.cgi?id=89959
>>> References: https://bugs.freedesktop.org/show_bug.cgi?id=90854
>>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_reg.h     |  3 +++
>>>   drivers/gpu/drm/i915/intel_uncore.c | 32 +++++++++++++++++++++++++++++++-
>>>   2 files changed, 34 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>>> index 0b979ad..3684f92 100644
>>> --- a/drivers/gpu/drm/i915/i915_reg.h
>>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>>> @@ -1461,6 +1461,9 @@ enum skl_disp_power_wells {
>>>   #define RING_MAX_IDLE(base)	((base)+0x54)
>>>   #define RING_HWS_PGA(base)	((base)+0x80)
>>>   #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
>>> +#define RING_RESET_CTL(base)	((base)+0xd0)
>>> +#define   RESET_CTL_REQUEST_RESET  (1 << 0)
>>> +#define   RESET_CTL_READY_TO_RESET (1 << 1)
>>>
>>>   #define HSW_GTT_CACHE_EN	0x4024
>>>   #define   GTT_CACHE_EN_ALL	0xF0007FFF
>>> diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
>>> index 4a86cf0..404bce2 100644
>>> --- a/drivers/gpu/drm/i915/intel_uncore.c
>>> +++ b/drivers/gpu/drm/i915/intel_uncore.c
>>> @@ -1455,9 +1455,39 @@ static int gen6_do_reset(struct drm_device *dev)
>>>   	return ret;
>>>   }
>>>
>>> +static int wait_for_bits_set(struct drm_i915_private *dev_priv,
>>> +			     const u32 reg, const u32 mask, const int timeout)
>>> +{
>>> +	return wait_for((I915_READ(reg) & mask) == mask, timeout);
>>> +}
>>> +
>>> +static int gen9_do_reset(struct drm_device *dev)
>>> +{
>>> +	struct drm_i915_private *dev_priv = dev->dev_private;
>>> +	struct intel_engine_cs *engine;
>>> +	int ret, i;
>>> +
>>> +	for_each_ring(engine, dev_priv, i) {
>>> +		I915_WRITE(RING_RESET_CTL(engine->mmio_base),
>>> +			   _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
>>> +
>>> +		ret = wait_for_bits_set(dev_priv,
>>> +					RING_RESET_CTL(engine->mmio_base),
>>> +					RESET_CTL_READY_TO_RESET, 700);
>>> +		if (ret) {
>>> +			DRM_ERROR("%s: reset request timeout\n", engine->name);
>>> +			return -ENODEV;
>>
>> return -EIO; since the reset didn't happen due to hardware issues
>> (ENODEV is that we don't have the implementation for the GPU rather than
>> it failed).
>>
>> Do we need any recovery? Do you guarrantee that the GPU reset resets the
>> CTL register?
>> -Chris
>
> According to the bspec (if I remember correctly from the last time I had 
> to deal with it - Mika, correct me if I'm way off here):
>
> If the reset request succeeds the reset request bit is cleared and 
> ready_to_reset is set. Following the engine reset both ready_to_reset 
> and reset request bits are set to 0. If the reset request fails the 
> reset_request bit is obviously still set.
>
> Then again, all of this is assuming engine resets rather than a full GPU 
> reset. The bspec does not say anything about what the effect of a full 
> gpu reset is on the reset control registers. It's always seemed to me 
> like the reset control register is only relevant when doing a per-engine 
> reset rather than a full GPU reset but I might very well be wrong about 
> that, especially since you guys have seen problems when not involving 
> this reset handshake before doing full GPU resets.
>

I don't know if this is needed before doing full gpu reset. But
as things are with current skl hardware, if blitter ring 
says it's not ready to reset, you better not write to 
the 0xc0 or you end up with system hang.

So currently this is just a way to let some resets through
and avoid the fatal ones. gem_concurrent_blit seems to be an
excellent choice of killing the gpu/blitter engine in such way
that no normal reset recovery is possible.

-Mika
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0b979ad..3684f92 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -1461,6 +1461,9 @@  enum skl_disp_power_wells {
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define RING_RESET_CTL(base)	((base)+0xd0)
+#define   RESET_CTL_REQUEST_RESET  (1 << 0)
+#define   RESET_CTL_READY_TO_RESET (1 << 1)
 
 #define HSW_GTT_CACHE_EN	0x4024
 #define   GTT_CACHE_EN_ALL	0xF0007FFF
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 4a86cf0..404bce2 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -1455,9 +1455,39 @@  static int gen6_do_reset(struct drm_device *dev)
 	return ret;
 }
 
+static int wait_for_bits_set(struct drm_i915_private *dev_priv,
+			     const u32 reg, const u32 mask, const int timeout)
+{
+	return wait_for((I915_READ(reg) & mask) == mask, timeout);
+}
+
+static int gen9_do_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *engine;
+	int ret, i;
+
+	for_each_ring(engine, dev_priv, i) {
+		I915_WRITE(RING_RESET_CTL(engine->mmio_base),
+			   _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET));
+
+		ret = wait_for_bits_set(dev_priv,
+					RING_RESET_CTL(engine->mmio_base),
+					RESET_CTL_READY_TO_RESET, 700);
+		if (ret) {
+			DRM_ERROR("%s: reset request timeout\n", engine->name);
+			return -ENODEV;
+		}
+	}
+
+	return gen6_do_reset(dev);
+}
+
 static int (*intel_get_gpu_reset(struct drm_device *dev))(struct drm_device *)
 {
-	if (INTEL_INFO(dev)->gen >= 6)
+	if (INTEL_INFO(dev)->gen >= 9)
+		return gen9_do_reset;
+	else if (INTEL_INFO(dev)->gen >= 6)
 		return gen6_do_reset;
 	else if (IS_GEN5(dev))
 		return ironlake_do_reset;