diff mbox

[v4] drm/i915/gen11: Preempt-to-idle support in execlists.

Message ID 1527272798-17326-1-git-send-email-tomasz.lis@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lis, Tomasz May 25, 2018, 6:26 p.m. UTC
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

v2: Added needs_preempt_context() change so that it is not created when
    preempt-to-idle is supported. (Chris)
    Updated setting HWACK flag so that it is cleared after
    preempt-to-dle. (Chris, Daniele)
    Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)

v3: Fixed needs_preempt_context() change. (Chris)
    Merged preemption trigger functions to one. (Chris)
    Fixed conyext state tonot assume COMPLETED_MASK after preemption,
    since idle-to-idle case will not have it set.

v4: Simplified needs_preempt_context() change. (Daniele)
    Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)

Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 113 +++++++++++++++++++++----------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 86 insertions(+), 37 deletions(-)

Comments

Daniele Ceraolo Spurio June 11, 2018, 4:37 p.m. UTC | #1
On 25/05/18 11:26, Tomasz Lis wrote:
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> v2: Added needs_preempt_context() change so that it is not created when
>      preempt-to-idle is supported. (Chris)
>      Updated setting HWACK flag so that it is cleared after
>      preempt-to-dle. (Chris, Daniele)
>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
> 
> v3: Fixed needs_preempt_context() change. (Chris)
>      Merged preemption trigger functions to one. (Chris)
>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>      since idle-to-idle case will not have it set.
> 
> v4: Simplified needs_preempt_context() change. (Daniele)
>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
> 
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
> Cc: Michal Winiarski <michal.winiarski@intel.com>
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>   drivers/gpu/drm/i915/intel_lrc.c         | 113 +++++++++++++++++++++----------
>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>   6 files changed, 86 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 487922f..35eddf7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>   		((dev_priv)->info.has_logical_ring_elsq)
>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>   		((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +		((dev_priv)->info.has_hw_preempt_to_idle)
>   
>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>   
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 45393f6..341a5ff 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context **ctxp)
>   
>   static bool needs_preempt_context(struct drm_i915_private *i915)
>   {
> -	return HAS_LOGICAL_RING_PREEMPTION(i915);
> +	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
> +	       !HAS_HW_PREEMPT_TO_IDLE(i915);
>   }
>   
>   int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 97a91e6a..ee09926 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -593,7 +593,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>   	GEN(11), \
>   	.ddb_size = 2048, \
>   	.has_csr = 0, \
> -	.has_logical_ring_elsq = 1
> +	.has_logical_ring_elsq = 1, \
> +	.has_hw_preempt_to_idle = 1
>   
>   static const struct intel_device_info intel_icelake_11_info = {
>   	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>   	func(has_logical_ring_contexts); \
>   	func(has_logical_ring_elsq); \
>   	func(has_logical_ring_preemption); \
> +	func(has_hw_preempt_to_idle); \
>   	func(has_overlay); \
>   	func(has_pooled_eu); \
>   	func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 8a6058b..f95cb37 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -154,6 +154,7 @@
>   #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
>   #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
>   #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
>   
>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>   	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq)
>   static void inject_preempt_context(struct intel_engine_cs *engine)

continuing the discussion from the previous patch, I still think that we 
should rename this function now that it doesn't inject a context on some 
gens. A new function name should be relatively trivial to handle from 
other patch series hitting the area (compared to having a second function).

>   {
>   	struct intel_engine_execlists *execlists = &engine->execlists;
> -	struct intel_context *ce =
> -		to_intel_context(engine->i915->preempt_context, engine);
> -	unsigned int n;
> -
> -	GEM_BUG_ON(execlists->preempt_complete_status !=
> -		   upper_32_bits(ce->lrc_desc));
> -	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> -		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> -		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> -				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>   
> -	/*
> -	 * Switch to our empty preempt context so
> -	 * the state of the GPU is known (idle).
> -	 */
>   	GEM_TRACE("%s\n", engine->name);
> -	for (n = execlists_num_ports(execlists); --n; )
> -		write_desc(execlists, 0, n);
> +	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
> +		/*
> +		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +		 */
> +		GEM_BUG_ON(execlists->ctrl_reg == NULL);
>   
> -	write_desc(execlists, ce->lrc_desc, n);
> +		/*
> +		 * If we have hardware preempt-to-idle, we do not need to
> +		 * inject any job to the hardware. We only set a flag.
> +		 */
> +		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> +	} else {
> +		struct intel_context *ce =
> +			to_intel_context(engine->i915->preempt_context, engine);
> +		unsigned int n;
>   
> -	/* we need to manually load the submit queue */
> -	if (execlists->ctrl_reg)
> -		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +		GEM_BUG_ON(execlists->preempt_complete_status !=
> +			   upper_32_bits(ce->lrc_desc));
> +		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
> +			    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
> +			   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
> +					      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
> +
> +		/*
> +		 * Switch to our empty preempt context so
> +		 * the state of the GPU is known (idle).
> +		 */
> +		for (n = execlists_num_ports(execlists); --n; )
> +			write_desc(execlists, 0, n);
> +
> +		write_desc(execlists, ce->lrc_desc, n);
> +
> +		/* we need to manually load the submit queue */
> +		if (execlists->ctrl_reg)
> +			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
> +	}
>   
>   	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>   	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
> @@ -1031,22 +1047,48 @@ static void process_csb(struct intel_engine_cs *engine)
>   				  status, buf[2*head + 1],
>   				  execlists->active);
>   
> -			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -				      GEN8_CTX_STATUS_PREEMPTED))
> -				execlists_set_active(execlists,
> -						     EXECLISTS_ACTIVE_HWACK);
> -			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> -				execlists_clear_active(execlists,
> -						       EXECLISTS_ACTIVE_HWACK);
> +			/*
> +			 * Check if preempted from idle to idle directly.
> +			 * The STATUS_IDLE_ACTIVE flag is used to mark
> +			 * such transition.
> +			 */
> +			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
> +			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
> +
> +				/* Cannot be waiting for HWACK while HW is idle */

This comment does not match the check, since if the 
EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
that we're waiting for it. Personally I would just remove the BUG_ON 
since we don't really care about the value of HWACK as long as 
EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
going to submit work until the whole preempt process is complete. A 
BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
complete_preempt_context so we're covered on that side.

With the 2 minor comments addressed:

Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>

Daniele

> +				GEM_BUG_ON(execlists_is_active(execlists,
> +						      EXECLISTS_ACTIVE_HWACK));
>   
> -			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> -				continue;
> +				/*
> +				 * We could not have COMPLETED anything
> +				 * if we were idle before preemption.
> +				 */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
> +			} else {
> +				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +					      GEN8_CTX_STATUS_PREEMPTED))
> +					execlists_set_active(execlists,
> +							     EXECLISTS_ACTIVE_HWACK);
> +
> +				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
> +					execlists_clear_active(execlists,
> +							       EXECLISTS_ACTIVE_HWACK);
> +
> +				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
> +					continue;
> +
> +				/* We should never get a COMPLETED | IDLE_ACTIVE! */
> +				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
> +			}
>   
> -			/* We should never get a COMPLETED | IDLE_ACTIVE! */
> -			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>   
> -			if (status & GEN8_CTX_STATUS_COMPLETE &&
> -			    buf[2*head + 1] == execlists->preempt_complete_status) {
> +			/*
> +			 * Check if preempted to real idle, either directly or
> +			 * the preemptive context already finished executing
> +			 */
> +			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +			    (status & GEN8_CTX_STATUS_COMPLETE &&
> +			    buf[2*head + 1] == execlists->preempt_complete_status)) {
>   				GEM_TRACE("%s preempt-idle\n", engine->name);
>   				complete_preempt_context(execlists);
>   				continue;
> @@ -2337,7 +2379,8 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
>   	engine->unpark = NULL;
>   
>   	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
> -	if (engine->i915->preempt_context)
> +	if (engine->i915->preempt_context ||
> +	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>   		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>   
>   	engine->i915->caps.scheduler =
> diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
> index 1593194..3249e9b 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/intel_lrc.h
> @@ -45,6 +45,7 @@
>   #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
>   #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
>   #define	  EL_CTRL_LOAD				(1 << 0)
> +#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
>   
>   /* The docs specify that the write pointer wraps around after 5h, "After status
>    * is written out to the last available status QW at offset 5h, this pointer
>
Lis, Tomasz June 29, 2018, 4:50 p.m. UTC | #2
On 2018-06-11 18:37, Daniele Ceraolo Spurio wrote:
>
>
> On 25/05/18 11:26, Tomasz Lis wrote:
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result 
>> from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be 
>> executed,
>> so the Command Streamer never preempted to idle directly. In Icelake 
>> it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> v2: Added needs_preempt_context() change so that it is not created when
>>      preempt-to-idle is supported. (Chris)
>>      Updated setting HWACK flag so that it is cleared after
>>      preempt-to-dle. (Chris, Daniele)
>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>
>> v3: Fixed needs_preempt_context() change. (Chris)
>>      Merged preemption trigger functions to one. (Chris)
>>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>      since idle-to-idle case will not have it set.
>>
>> v4: Simplified needs_preempt_context() change. (Daniele)
>>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>
>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 113 
>> +++++++++++++++++++++----------
>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>   6 files changed, 86 insertions(+), 37 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 487922f..35eddf7 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private 
>> *dev_priv)
>>           ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>           ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>> b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 45393f6..341a5ff 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context 
>> **ctxp)
>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>   {
>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>> +           !HAS_HW_PREEMPT_TO_IDLE(i915);
>>   }
>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>> b/drivers/gpu/drm/i915/i915_pci.c
>> index 97a91e6a..ee09926 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -593,7 +593,8 @@ static const struct intel_device_info 
>> intel_cannonlake_info = {
>>       GEN(11), \
>>       .ddb_size = 2048, \
>>       .has_csr = 0, \
>> -    .has_logical_ring_elsq = 1
>> +    .has_logical_ring_elsq = 1, \
>> +    .has_hw_preempt_to_idle = 1
>>     static const struct intel_device_info intel_icelake_11_info = {
>>       GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>> b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>       func(has_logical_ring_contexts); \
>>       func(has_logical_ring_elsq); \
>>       func(has_logical_ring_preemption); \
>> +    func(has_hw_preempt_to_idle); \
>>       func(has_overlay); \
>>       func(has_pooled_eu); \
>>       func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 8a6058b..f95cb37 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -154,6 +154,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port 
>> *port, struct i915_request *rq)
>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>
> continuing the discussion from the previous patch, I still think that 
> we should rename this function now that it doesn't inject a context on 
> some gens. A new function name should be relatively trivial to handle 
> from other patch series hitting the area (compared to having a second 
> function).
Ok, will rename it then.
What would be the most adequate name? execlist_send_preempt_to_idle()?
>
>>   {
>>       struct intel_engine_execlists *execlists = &engine->execlists;
>> -    struct intel_context *ce =
>> -        to_intel_context(engine->i915->preempt_context, engine);
>> -    unsigned int n;
>> -
>> -    GEM_BUG_ON(execlists->preempt_complete_status !=
>> -           upper_32_bits(ce->lrc_desc));
>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>   -    /*
>> -     * Switch to our empty preempt context so
>> -     * the state of the GPU is known (idle).
>> -     */
>>       GEM_TRACE("%s\n", engine->name);
>> -    for (n = execlists_num_ports(execlists); --n; )
>> -        write_desc(execlists, 0, n);
>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>> +        /*
>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +         */
>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>   -    write_desc(execlists, ce->lrc_desc, n);
>> +        /*
>> +         * If we have hardware preempt-to-idle, we do not need to
>> +         * inject any job to the hardware. We only set a flag.
>> +         */
>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>> +    } else {
>> +        struct intel_context *ce =
>> + to_intel_context(engine->i915->preempt_context, engine);
>> +        unsigned int n;
>>   -    /* we need to manually load the submit queue */
>> -    if (execlists->ctrl_reg)
>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>> +               upper_32_bits(ce->lrc_desc));
>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>> +                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>> +
>> +        /*
>> +         * Switch to our empty preempt context so
>> +         * the state of the GPU is known (idle).
>> +         */
>> +        for (n = execlists_num_ports(execlists); --n; )
>> +            write_desc(execlists, 0, n);
>> +
>> +        write_desc(execlists, ce->lrc_desc, n);
>> +
>> +        /* we need to manually load the submit queue */
>> +        if (execlists->ctrl_reg)
>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>> +    }
>>         execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>> @@ -1031,22 +1047,48 @@ static void process_csb(struct 
>> intel_engine_cs *engine)
>>                     status, buf[2*head + 1],
>>                     execlists->active);
>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                      GEN8_CTX_STATUS_PREEMPTED))
>> -                execlists_set_active(execlists,
>> -                             EXECLISTS_ACTIVE_HWACK);
>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> -                execlists_clear_active(execlists,
>> -                               EXECLISTS_ACTIVE_HWACK);
>> +            /*
>> +             * Check if preempted from idle to idle directly.
>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>> +             * such transition.
>> +             */
>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>> +
>> +                /* Cannot be waiting for HWACK while HW is idle */
>
> This comment does not match the check, since if the 
> EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
> that we're waiting for it. Personally I would just remove the BUG_ON 
> since we don't really care about the value of HWACK as long as 
> EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
> going to submit work until the whole preempt process is complete. A 
> BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
> complete_preempt_context so we're covered on that side.
Will remove.
>
> With the 2 minor comments addressed:
>
> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>
> Daniele
>
>> + GEM_BUG_ON(execlists_is_active(execlists,
>> +                              EXECLISTS_ACTIVE_HWACK));
>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> -                continue;
>> +                /*
>> +                 * We could not have COMPLETED anything
>> +                 * if we were idle before preemption.
>> +                 */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>> +            } else {
>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                          GEN8_CTX_STATUS_PREEMPTED))
>> +                    execlists_set_active(execlists,
>> +                                 EXECLISTS_ACTIVE_HWACK);
>> +
>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>> +                    execlists_clear_active(execlists,
>> +                                   EXECLISTS_ACTIVE_HWACK);
>> +
>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>> +                    continue;
>> +
>> +                /* We should never get a COMPLETED | IDLE_ACTIVE! */
>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>> +            }
>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                buf[2*head + 1] == 
>> execlists->preempt_complete_status) {
>> +            /*
>> +             * Check if preempted to real idle, either directly or
>> +             * the preemptive context already finished executing
>> +             */
>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                buf[2*head + 1] == 
>> execlists->preempt_complete_status)) {
>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>                   complete_preempt_context(execlists);
>>                   continue;
>> @@ -2337,7 +2379,8 @@ static void 
>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>       engine->unpark = NULL;
>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>> -    if (engine->i915->preempt_context)
>> +    if (engine->i915->preempt_context ||
>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>         engine->i915->caps.scheduler =
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>> b/drivers/gpu/drm/i915/intel_lrc.h
>> index 1593194..3249e9b 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>> @@ -45,6 +45,7 @@
>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>> + 0x510)
>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>> 0x550)
>>   #define      EL_CTRL_LOAD                (1 << 0)
>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>     /* The docs specify that the write pointer wraps around after 5h, 
>> "After status
>>    * is written out to the last available status QW at offset 5h, 
>> this pointer
>>
Daniele Ceraolo Spurio July 2, 2018, 5:36 p.m. UTC | #3
On 29/06/18 09:50, Lis, Tomasz wrote:
> 
> 
> On 2018-06-11 18:37, Daniele Ceraolo Spurio wrote:
>>
>>
>> On 25/05/18 11:26, Tomasz Lis wrote:
>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>> bit within Execlist Control Register, and receiving preemption result 
>>> from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be 
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake 
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when GuC is
>>> active.
>>>
>>> v2: Added needs_preempt_context() change so that it is not created when
>>>      preempt-to-idle is supported. (Chris)
>>>      Updated setting HWACK flag so that it is cleared after
>>>      preempt-to-dle. (Chris, Daniele)
>>>      Updated to use I915_ENGINE_HAS_PREEMPTION flag. (Chris)
>>>
>>> v3: Fixed needs_preempt_context() change. (Chris)
>>>      Merged preemption trigger functions to one. (Chris)
>>>      Fixed conyext state tonot assume COMPLETED_MASK after preemption,
>>>      since idle-to-idle case will not have it set.
>>>
>>> v4: Simplified needs_preempt_context() change. (Daniele)
>>>      Removed clearing HWACK flag in idle-to-idle preempt. (Daniele)
>>>
>>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
>>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>> Cc: Michal Winiarski <michal.winiarski@intel.com>
>>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h          |   2 +
>>>   drivers/gpu/drm/i915/i915_gem_context.c  |   3 +-
>>>   drivers/gpu/drm/i915/i915_pci.c          |   3 +-
>>>   drivers/gpu/drm/i915/intel_device_info.h |   1 +
>>>   drivers/gpu/drm/i915/intel_lrc.c         | 113 
>>> +++++++++++++++++++++----------
>>>   drivers/gpu/drm/i915/intel_lrc.h         |   1 +
>>>   6 files changed, 86 insertions(+), 37 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 487922f..35eddf7 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2534,6 +2534,8 @@ intel_info(const struct drm_i915_private 
>>> *dev_priv)
>>>           ((dev_priv)->info.has_logical_ring_elsq)
>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>           ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +        ((dev_priv)->info.has_hw_preempt_to_idle)
>>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>   diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>> index 45393f6..341a5ff 100644
>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>> @@ -455,7 +455,8 @@ destroy_kernel_context(struct i915_gem_context 
>>> **ctxp)
>>>     static bool needs_preempt_context(struct drm_i915_private *i915)
>>>   {
>>> -    return HAS_LOGICAL_RING_PREEMPTION(i915);
>>> +    return HAS_LOGICAL_RING_PREEMPTION(i915) &&
>>> +           !HAS_HW_PREEMPT_TO_IDLE(i915);
>>>   }
>>>     int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 97a91e6a..ee09926 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -593,7 +593,8 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>       GEN(11), \
>>>       .ddb_size = 2048, \
>>>       .has_csr = 0, \
>>> -    .has_logical_ring_elsq = 1
>>> +    .has_logical_ring_elsq = 1, \
>>> +    .has_hw_preempt_to_idle = 1
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>       GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>       func(has_logical_ring_contexts); \
>>>       func(has_logical_ring_elsq); \
>>>       func(has_logical_ring_preemption); \
>>> +    func(has_hw_preempt_to_idle); \
>>>       func(has_overlay); \
>>>       func(has_pooled_eu); \
>>>       func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 8a6058b..f95cb37 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -154,6 +154,7 @@
>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>   #define GEN8_CTX_STATUS_COMPLETE    (1 << 4)
>>>   #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE    (1 << 29)
>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>        (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -522,31 +523,46 @@ static void port_assign(struct execlist_port 
>>> *port, struct i915_request *rq)
>>>   static void inject_preempt_context(struct intel_engine_cs *engine)
>>
>> continuing the discussion from the previous patch, I still think that 
>> we should rename this function now that it doesn't inject a context on 
>> some gens. A new function name should be relatively trivial to handle 
>> from other patch series hitting the area (compared to having a second 
>> function).
> Ok, will rename it then.
> What would be the most adequate name? execlist_send_preempt_to_idle()?

even something simpler like "inject_preemption()" would work IMO. But 
I've always been bad with naming, so I'll leave it to your judgment :)

Daniele

>>
>>>   {
>>>       struct intel_engine_execlists *execlists = &engine->execlists;
>>> -    struct intel_context *ce =
>>> -        to_intel_context(engine->i915->preempt_context, engine);
>>> -    unsigned int n;
>>> -
>>> -    GEM_BUG_ON(execlists->preempt_complete_status !=
>>> -           upper_32_bits(ce->lrc_desc));
>>> -    GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> - _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> -                      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>>   -    /*
>>> -     * Switch to our empty preempt context so
>>> -     * the state of the GPU is known (idle).
>>> -     */
>>>       GEM_TRACE("%s\n", engine->name);
>>> -    for (n = execlists_num_ports(execlists); --n; )
>>> -        write_desc(execlists, 0, n);
>>> +    if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
>>> +        /*
>>> +         * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +         * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +         */
>>> +        GEM_BUG_ON(execlists->ctrl_reg == NULL);
>>>   -    write_desc(execlists, ce->lrc_desc, n);
>>> +        /*
>>> +         * If we have hardware preempt-to-idle, we do not need to
>>> +         * inject any job to the hardware. We only set a flag.
>>> +         */
>>> +        writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> +    } else {
>>> +        struct intel_context *ce =
>>> + to_intel_context(engine->i915->preempt_context, engine);
>>> +        unsigned int n;
>>>   -    /* we need to manually load the submit queue */
>>> -    if (execlists->ctrl_reg)
>>> -        writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +        GEM_BUG_ON(execlists->preempt_complete_status !=
>>> +               upper_32_bits(ce->lrc_desc));
>>> +        GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                           CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
>>> + _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
>>> +                          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
>>> +
>>> +        /*
>>> +         * Switch to our empty preempt context so
>>> +         * the state of the GPU is known (idle).
>>> +         */
>>> +        for (n = execlists_num_ports(execlists); --n; )
>>> +            write_desc(execlists, 0, n);
>>> +
>>> +        write_desc(execlists, ce->lrc_desc, n);
>>> +
>>> +        /* we need to manually load the submit queue */
>>> +        if (execlists->ctrl_reg)
>>> +            writel(EL_CTRL_LOAD, execlists->ctrl_reg);
>>> +    }
>>>         execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
>>>       execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
>>> @@ -1031,22 +1047,48 @@ static void process_csb(struct 
>>> intel_engine_cs *engine)
>>>                     status, buf[2*head + 1],
>>>                     execlists->active);
>>>   -            if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> -                      GEN8_CTX_STATUS_PREEMPTED))
>>> -                execlists_set_active(execlists,
>>> -                             EXECLISTS_ACTIVE_HWACK);
>>> -            if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> -                execlists_clear_active(execlists,
>>> -                               EXECLISTS_ACTIVE_HWACK);
>>> +            /*
>>> +             * Check if preempted from idle to idle directly.
>>> +             * The STATUS_IDLE_ACTIVE flag is used to mark
>>> +             * such transition.
>>> +             */
>>> +            if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
>>> +                 (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
>>> +
>>> +                /* Cannot be waiting for HWACK while HW is idle */
>>
>> This comment does not match the check, since if the 
>> EXECLISTS_ACTIVE_HWACK is set it means we've received the hw ack, not 
>> that we're waiting for it. Personally I would just remove the BUG_ON 
>> since we don't really care about the value of HWACK as long as 
>> EXECLISTS_ACTIVE_PREEMPT is set, as the latter ensures us we're not 
>> going to submit work until the whole preempt process is complete. A 
>> BUG_ON for EXECLISTS_ACTIVE_PREEMPT is already in 
>> complete_preempt_context so we're covered on that side.
> Will remove.
>>
>> With the 2 minor comments addressed:
>>
>> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
>>
>> Daniele
>>
>>> + GEM_BUG_ON(execlists_is_active(execlists,
>>> +                              EXECLISTS_ACTIVE_HWACK));
>>>   -            if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> -                continue;
>>> +                /*
>>> +                 * We could not have COMPLETED anything
>>> +                 * if we were idle before preemption.
>>> +                 */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
>>> +            } else {
>>> +                if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> +                          GEN8_CTX_STATUS_PREEMPTED))
>>> +                    execlists_set_active(execlists,
>>> +                                 EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> +                    execlists_clear_active(execlists,
>>> +                                   EXECLISTS_ACTIVE_HWACK);
>>> +
>>> +                if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
>>> +                    continue;
>>> +
>>> +                /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> +                GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>> +            }
>>>   -            /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>> -            GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>   -            if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                buf[2*head + 1] == 
>>> execlists->preempt_complete_status) {
>>> +            /*
>>> +             * Check if preempted to real idle, either directly or
>>> +             * the preemptive context already finished executing
>>> +             */
>>> +            if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                buf[2*head + 1] == 
>>> execlists->preempt_complete_status)) {
>>>                   GEM_TRACE("%s preempt-idle\n", engine->name);
>>>                   complete_preempt_context(execlists);
>>>                   continue;
>>> @@ -2337,7 +2379,8 @@ static void 
>>> execlists_set_default_submission(struct intel_engine_cs *engine)
>>>       engine->unpark = NULL;
>>>         engine->flags |= I915_ENGINE_SUPPORTS_STATS;
>>> -    if (engine->i915->preempt_context)
>>> +    if (engine->i915->preempt_context ||
>>> +        HAS_HW_PREEMPT_TO_IDLE(engine->i915))
>>>           engine->flags |= I915_ENGINE_HAS_PREEMPTION;
>>>         engine->i915->caps.scheduler =
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.h 
>>> b/drivers/gpu/drm/i915/intel_lrc.h
>>> index 1593194..3249e9b 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.h
>>> @@ -45,6 +45,7 @@
>>>   #define RING_EXECLIST_SQ_CONTENTS(engine) _MMIO((engine)->mmio_base 
>>> + 0x510)
>>>   #define RING_EXECLIST_CONTROL(engine) _MMIO((engine)->mmio_base + 
>>> 0x550)
>>>   #define      EL_CTRL_LOAD                (1 << 0)
>>> +#define      EL_CTRL_PREEMPT_TO_IDLE        (1 << 1)
>>>     /* The docs specify that the write pointer wraps around after 5h, 
>>> "After status
>>>    * is written out to the last available status QW at offset 5h, 
>>> this pointer
>>>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 487922f..35eddf7 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2534,6 +2534,8 @@  intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 45393f6..341a5ff 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -455,7 +455,8 @@  destroy_kernel_context(struct i915_gem_context **ctxp)
 
 static bool needs_preempt_context(struct drm_i915_private *i915)
 {
-	return HAS_LOGICAL_RING_PREEMPTION(i915);
+	return HAS_LOGICAL_RING_PREEMPTION(i915) &&
+	       !HAS_HW_PREEMPT_TO_IDLE(i915);
 }
 
 int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 97a91e6a..ee09926 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -593,7 +593,8 @@  static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@  enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 8a6058b..f95cb37 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -154,6 +154,7 @@ 
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -522,31 +523,46 @@  static void port_assign(struct execlist_port *port, struct i915_request *rq)
 static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists *execlists = &engine->execlists;
-	struct intel_context *ce =
-		to_intel_context(engine->i915->preempt_context, engine);
-	unsigned int n;
-
-	GEM_BUG_ON(execlists->preempt_complete_status !=
-		   upper_32_bits(ce->lrc_desc));
-	GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
-		    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
-		   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-				      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
 
-	/*
-	 * Switch to our empty preempt context so
-	 * the state of the GPU is known (idle).
-	 */
 	GEM_TRACE("%s\n", engine->name);
-	for (n = execlists_num_ports(execlists); --n; )
-		write_desc(execlists, 0, n);
+	if (HAS_HW_PREEMPT_TO_IDLE(engine->i915)) {
+		/*
+		 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+		 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+		 */
+		GEM_BUG_ON(execlists->ctrl_reg == NULL);
 
-	write_desc(execlists, ce->lrc_desc, n);
+		/*
+		 * If we have hardware preempt-to-idle, we do not need to
+		 * inject any job to the hardware. We only set a flag.
+		 */
+		writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+	} else {
+		struct intel_context *ce =
+			to_intel_context(engine->i915->preempt_context, engine);
+		unsigned int n;
 
-	/* we need to manually load the submit queue */
-	if (execlists->ctrl_reg)
-		writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+		GEM_BUG_ON(execlists->preempt_complete_status !=
+			   upper_32_bits(ce->lrc_desc));
+		GEM_BUG_ON((ce->lrc_reg_state[CTX_CONTEXT_CONTROL + 1] &
+			    _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					       CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT)) !=
+			   _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+					      CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT));
+
+		/*
+		 * Switch to our empty preempt context so
+		 * the state of the GPU is known (idle).
+		 */
+		for (n = execlists_num_ports(execlists); --n; )
+			write_desc(execlists, 0, n);
+
+		write_desc(execlists, ce->lrc_desc, n);
+
+		/* we need to manually load the submit queue */
+		if (execlists->ctrl_reg)
+			writel(EL_CTRL_LOAD, execlists->ctrl_reg);
+	}
 
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
 	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
@@ -1031,22 +1047,48 @@  static void process_csb(struct intel_engine_cs *engine)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
-				execlists_set_active(execlists,
-						     EXECLISTS_ACTIVE_HWACK);
-			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
-				execlists_clear_active(execlists,
-						       EXECLISTS_ACTIVE_HWACK);
+			/*
+			 * Check if preempted from idle to idle directly.
+			 * The STATUS_IDLE_ACTIVE flag is used to mark
+			 * such transition.
+			 */
+			if ((status & GEN8_CTX_STATUS_IDLE_ACTIVE) &&
+			     (status & GEN11_CTX_STATUS_PREEMPT_IDLE)) {
+
+				/* Cannot be waiting for HWACK while HW is idle */
+				GEM_BUG_ON(execlists_is_active(execlists,
+						      EXECLISTS_ACTIVE_HWACK));
 
-			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
-				continue;
+				/*
+				 * We could not have COMPLETED anything
+				 * if we were idle before preemption.
+				 */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_COMPLETED_MASK);
+			} else {
+				if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					      GEN8_CTX_STATUS_PREEMPTED))
+					execlists_set_active(execlists,
+							     EXECLISTS_ACTIVE_HWACK);
+
+				if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
+					execlists_clear_active(execlists,
+							       EXECLISTS_ACTIVE_HWACK);
+
+				if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
+					continue;
+
+				/* We should never get a COMPLETED | IDLE_ACTIVE! */
+				GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
+			}
 
-			/* We should never get a COMPLETED | IDLE_ACTIVE! */
-			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 				complete_preempt_context(execlists);
 				continue;
@@ -2337,7 +2379,8 @@  static void execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->unpark = NULL;
 
 	engine->flags |= I915_ENGINE_SUPPORTS_STATS;
-	if (engine->i915->preempt_context)
+	if (engine->i915->preempt_context ||
+	    HAS_HW_PREEMPT_TO_IDLE(engine->i915))
 		engine->flags |= I915_ENGINE_HAS_PREEMPTION;
 
 	engine->i915->caps.scheduler =
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 1593194..3249e9b 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@ 
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer