diff mbox

[v1] drm/i915/gen11: Preempt-to-idle support in execlists.

Message ID 1522163879-10837-1-git-send-email-tomasz.lis@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lis, Tomasz March 27, 2018, 3:17 p.m. UTC
The patch adds support of preempt-to-idle requesting by setting a proper
bit within Execlist Control Register, and receiving preemption result from
Context Status Buffer.

Preemption in previous gens required a special batch buffer to be executed,
so the Command Streamer never preempted to idle directly. In Icelake it is
possible, as there is a hardware mechanism to inform the kernel about
status of the preemption request.

This patch does not cover using the new preemption mechanism when GuC is
active.

Bspec: 18922
Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h          |  2 ++
 drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
 drivers/gpu/drm/i915/intel_device_info.h |  1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/intel_lrc.h         |  1 +
 5 files changed, 45 insertions(+), 7 deletions(-)

Comments

Chris Wilson March 27, 2018, 11:27 p.m. UTC | #1
Quoting Tomasz Lis (2018-03-27 16:17:59)
> The patch adds support of preempt-to-idle requesting by setting a proper
> bit within Execlist Control Register, and receiving preemption result from
> Context Status Buffer.
> 
> Preemption in previous gens required a special batch buffer to be executed,
> so the Command Streamer never preempted to idle directly. In Icelake it is
> possible, as there is a hardware mechanism to inform the kernel about
> status of the preemption request.
> 
> This patch does not cover using the new preemption mechanism when GuC is
> active.
> 
> Bspec: 18922
> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>  drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>  5 files changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 800230b..c32580b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>                 ((dev_priv)->info.has_logical_ring_elsq)
>  #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>                 ((dev_priv)->info.has_logical_ring_preemption)
> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>  
>  #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>  
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 4364922..66b6700 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>         GEN(11), \
>         .ddb_size = 2048, \
>         .has_csr = 0, \
> -       .has_logical_ring_elsq = 1
> +       .has_logical_ring_elsq = 1, \
> +       .has_hw_preempt_to_idle = 1
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>         GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> index 933e316..4eb97b5 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -98,6 +98,7 @@ enum intel_platform {
>         func(has_logical_ring_contexts); \
>         func(has_logical_ring_elsq); \
>         func(has_logical_ring_preemption); \
> +       func(has_hw_preempt_to_idle); \
>         func(has_overlay); \
>         func(has_pooled_eu); \
>         func(has_psr); \
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index ba7f783..1a22de4 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -153,6 +153,7 @@
>  #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>  #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>  #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>  
>  #define GEN8_CTX_STATUS_COMPLETED_MASK \
>          (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>                                 const struct i915_request *last,
>                                 int prio)
>  {
> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> +       return (engine->i915->preempt_context ||
> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&

Well, you haven't actually disabled allocating the preempt_context so...

But at any rate, making this an engine->flag would eliminate one pointer
dance.

> +                prio > max(rq_prio(last), 0);
>  }
>  
>  /**
> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>         execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>  }
>  
> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> +{
> +       struct intel_engine_execlists *execlists = &engine->execlists;
> +
> +       GEM_TRACE("%s\n", engine->name);
> +
> +       /*
> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> +        */
> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
> +
> +       /* trigger preemption to idle */
> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);

Future plans? Because just inserting the branch into the setter of
inject_preempt_context() resolves a lot of conflicts with other work.

> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                                   status, buf[2*head + 1],
>                                   execlists->active);
>  
> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> -                                     GEN8_CTX_STATUS_PREEMPTED))
> +                       /* Check if switched to active or preempted to active */
> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))

Setting HWACK here is harmless as it gets cleared again. Unless, there
is some oddity in the code flow.

>                                 execlists_set_active(execlists,
>                                                      EXECLISTS_ACTIVE_HWACK);
> +
>                         if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>                                 execlists_clear_active(execlists,
>                                                        EXECLISTS_ACTIVE_HWACK);
> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>                         /* We should never get a COMPLETED | IDLE_ACTIVE! */
>                         GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>  
> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
> +                       /*
> +                        * Check if preempted to real idle, either directly or
> +                        * the preemptive context already finished executing
> +                        */
> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>                                 GEM_TRACE("%s preempt-idle\n", engine->name);

Hmm. I was hoping that we would be able to engineer a single check to
cover all sins. Might have been overly optimistic, but I can dream.
-Chris
Lis, Tomasz March 28, 2018, 4:06 p.m. UTC | #2
On 2018-03-28 01:27, Chris Wilson wrote:
> Quoting Tomasz Lis (2018-03-27 16:17:59)
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be executed,
>> so the Command Streamer never preempted to idle directly. In Icelake it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 800230b..c32580b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>                  ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>   
>>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 4364922..66b6700 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>          GEN(11), \
>>          .ddb_size = 2048, \
>>          .has_csr = 0, \
>> -       .has_logical_ring_elsq = 1
>> +       .has_logical_ring_elsq = 1, \
>> +       .has_hw_preempt_to_idle = 1
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>          GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>          func(has_logical_ring_contexts); \
>>          func(has_logical_ring_elsq); \
>>          func(has_logical_ring_preemption); \
>> +       func(has_hw_preempt_to_idle); \
>>          func(has_overlay); \
>>          func(has_pooled_eu); \
>>          func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index ba7f783..1a22de4 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -153,6 +153,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>   
>>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>                                  const struct i915_request *last,
>>                                  int prio)
>>   {
>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>> +       return (engine->i915->preempt_context ||
>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> Well, you haven't actually disabled allocating the preempt_context so...
Yes.. I had mixed feelings about changing needs_preempt_context() now, 
as that would mean adding a temporary condition on GuC until the GuC 
preemption is merged.
I will add the conditions and disable the allocation in v2 of the patch.
> But at any rate, making this an engine->flag would eliminate one pointer
> dance.
Could be an interesting idea for a separate patch.
>
>> +                prio > max(rq_prio(last), 0);
>>   }
>>   
>>   /**
>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>   }
>>   
>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>> +{
>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>> +
>> +       GEM_TRACE("%s\n", engine->name);
>> +
>> +       /*
>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +        */
>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>> +
>> +       /* trigger preemption to idle */
>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> Future plans? Because just inserting the branch into the setter of
> inject_preempt_context() resolves a lot of conflicts with other work.
My arguments for separate function are:
- better code readability
- keeping the symmetry between execlist and GuC flow - GuC preemption 
patches will introduce separate function as well
- only 4 lines of the function would be common
- the name inject_preempt_context() wouldn't match the new purpose, so 
renaming would be needed
- reduced self-documenting code due to two separate preempt methods not 
having distinct names

That's all, I don't have any future plans for it. If you want me to 
merge the two, let me know.

>
>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                                    status, buf[2*head + 1],
>>                                    execlists->active);
>>   
>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>> +                       /* Check if switched to active or preempted to active */
>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> Setting HWACK here is harmless as it gets cleared again. Unless, there
> is some oddity in the code flow.
I will check if lack of the change affects test results.
Personally, I would keep this change, even if only for allowing simple 
definition of what HWACK flag means.
>
>>                                  execlists_set_active(execlists,
>>                                                       EXECLISTS_ACTIVE_HWACK);
>> +
>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>                                  execlists_clear_active(execlists,
>>                                                         EXECLISTS_ACTIVE_HWACK);
>> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                          /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>                          GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   
>> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
>> +                       /*
>> +                        * Check if preempted to real idle, either directly or
>> +                        * the preemptive context already finished executing
>> +                        */
>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>>                                  GEM_TRACE("%s preempt-idle\n", engine->name);
> Hmm. I was hoping that we would be able to engineer a single check to
> cover all sins. Might have been overly optimistic, but I can dream.
> -Chris
I don't see any way to do that, besides creating separate function for 
gen11.
Chris Wilson March 28, 2018, 10:28 p.m. UTC | #3
Quoting Lis, Tomasz (2018-03-28 17:06:58)
> 
> 
> On 2018-03-28 01:27, Chris Wilson wrote:
> > Quoting Tomasz Lis (2018-03-27 16:17:59)
> >> The patch adds support of preempt-to-idle requesting by setting a proper
> >> bit within Execlist Control Register, and receiving preemption result from
> >> Context Status Buffer.
> >>
> >> Preemption in previous gens required a special batch buffer to be executed,
> >> so the Command Streamer never preempted to idle directly. In Icelake it is
> >> possible, as there is a hardware mechanism to inform the kernel about
> >> status of the preemption request.
> >>
> >> This patch does not cover using the new preemption mechanism when GuC is
> >> active.
> >>
> >> Bspec: 18922
> >> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
> >>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
> >>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
> >>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
> >>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
> >>   5 files changed, 45 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 800230b..c32580b 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
> >>                  ((dev_priv)->info.has_logical_ring_elsq)
> >>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
> >>                  ((dev_priv)->info.has_logical_ring_preemption)
> >> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
> >> +               ((dev_priv)->info.has_hw_preempt_to_idle)
> >>   
> >>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
> >>   
> >> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> >> index 4364922..66b6700 100644
> >> --- a/drivers/gpu/drm/i915/i915_pci.c
> >> +++ b/drivers/gpu/drm/i915/i915_pci.c
> >> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
> >>          GEN(11), \
> >>          .ddb_size = 2048, \
> >>          .has_csr = 0, \
> >> -       .has_logical_ring_elsq = 1
> >> +       .has_logical_ring_elsq = 1, \
> >> +       .has_hw_preempt_to_idle = 1
> >>   
> >>   static const struct intel_device_info intel_icelake_11_info = {
> >>          GEN11_FEATURES,
> >> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
> >> index 933e316..4eb97b5 100644
> >> --- a/drivers/gpu/drm/i915/intel_device_info.h
> >> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> >> @@ -98,6 +98,7 @@ enum intel_platform {
> >>          func(has_logical_ring_contexts); \
> >>          func(has_logical_ring_elsq); \
> >>          func(has_logical_ring_preemption); \
> >> +       func(has_hw_preempt_to_idle); \
> >>          func(has_overlay); \
> >>          func(has_pooled_eu); \
> >>          func(has_psr); \
> >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> >> index ba7f783..1a22de4 100644
> >> --- a/drivers/gpu/drm/i915/intel_lrc.c
> >> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> >> @@ -153,6 +153,7 @@
> >>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
> >>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
> >>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
> >> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
> >>   
> >>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
> >>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
> >> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
> >>                                  const struct i915_request *last,
> >>                                  int prio)
> >>   {
> >> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
> >> +       return (engine->i915->preempt_context ||
> >> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> > Well, you haven't actually disabled allocating the preempt_context so...
> Yes.. I had mixed feelings about changing needs_preempt_context() now, 
> as that would mean adding a temporary condition on GuC until the GuC 
> preemption is merged.
> I will add the conditions and disable the allocation in v2 of the patch.
> > But at any rate, making this an engine->flag would eliminate one pointer
> > dance.
> Could be an interesting idea for a separate patch.

To land first ;)

> >> +                prio > max(rq_prio(last), 0);
> >>   }
> >>   
> >>   /**
> >> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
> >>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
> >>   }
> >>   
> >> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
> >> +{
> >> +       struct intel_engine_execlists *execlists = &engine->execlists;
> >> +
> >> +       GEM_TRACE("%s\n", engine->name);
> >> +
> >> +       /*
> >> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
> >> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
> >> +        */
> >> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
> >> +
> >> +       /* trigger preemption to idle */
> >> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> > Future plans? Because just inserting the branch into the setter of
> > inject_preempt_context() resolves a lot of conflicts with other work.
> My arguments for separate function are:
> - better code readability
> - keeping the symmetry between execlist and GuC flow - GuC preemption 
> patches will introduce separate function as well
> - only 4 lines of the function would be common
> - the name inject_preempt_context() wouldn't match the new purpose, so 
> renaming would be needed
> - reduced self-documenting code due to two separate preempt methods not 
> having distinct names
> 
> That's all, I don't have any future plans for it. If you want me to 
> merge the two, let me know.

The problem that I am worrying about is that we will duplicate bunch of
other code, the actual ELS[PQ] write is the smaller portion. Plus we
already have the branch on something much more pleasant.

> >> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
> >>                                    status, buf[2*head + 1],
> >>                                    execlists->active);
> >>   
> >> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >> -                                     GEN8_CTX_STATUS_PREEMPTED))
> >> +                       /* Check if switched to active or preempted to active */
> >> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
> >> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
> >> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> > Setting HWACK here is harmless as it gets cleared again. Unless, there
> > is some oddity in the code flow.
> I will check if lack of the change affects test results.
> Personally, I would keep this change, even if only for allowing simple 
> definition of what HWACK flag means.

The simple definition is the opposite one, imo. We set the flag after we
get the corresponding response from HW; any preemption or activate event
must follow the most recent ELSP write. So that will include the
preemption event following the preempt-idle write.

Then on deciding that the HW is idle, we apply the complication such
that execlists->active == 0. (That rule is what breaks the pattern.)
-Chris
Lis, Tomasz March 30, 2018, 3:42 p.m. UTC | #4
On 2018-03-29 00:28, Chris Wilson wrote:
> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>
>> On 2018-03-28 01:27, Chris Wilson wrote:
>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>> The patch adds support of preempt-to-idle requesting by setting a proper
>>>> bit within Execlist Control Register, and receiving preemption result from
>>>> Context Status Buffer.
>>>>
>>>> Preemption in previous gens required a special batch buffer to be executed,
>>>> so the Command Streamer never preempted to idle directly. In Icelake it is
>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>> status of the preemption request.
>>>>
>>>> This patch does not cover using the new preemption mechanism when GuC is
>>>> active.
>>>>
>>>> Bspec: 18922
>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>>>> index 800230b..c32580b 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>>>                   ((dev_priv)->info.has_logical_ring_elsq)
>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>                   ((dev_priv)->info.has_logical_ring_preemption)
>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>    
>>>>    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>    
>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>>>> index 4364922..66b6700 100644
>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>>>           GEN(11), \
>>>>           .ddb_size = 2048, \
>>>>           .has_csr = 0, \
>>>> -       .has_logical_ring_elsq = 1
>>>> +       .has_logical_ring_elsq = 1, \
>>>> +       .has_hw_preempt_to_idle = 1
>>>>    
>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>           GEN11_FEATURES,
>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>>>> index 933e316..4eb97b5 100644
>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>           func(has_logical_ring_contexts); \
>>>>           func(has_logical_ring_elsq); \
>>>>           func(has_logical_ring_preemption); \
>>>> +       func(has_hw_preempt_to_idle); \
>>>>           func(has_overlay); \
>>>>           func(has_pooled_eu); \
>>>>           func(has_psr); \
>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>>>> index ba7f783..1a22de4 100644
>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>> @@ -153,6 +153,7 @@
>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>    
>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>>>                                   const struct i915_request *last,
>>>>                                   int prio)
>>>>    {
>>>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>>>> +       return (engine->i915->preempt_context ||
>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>> Well, you haven't actually disabled allocating the preempt_context so...
>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>> as that would mean adding a temporary condition on GuC until the GuC
>> preemption is merged.
>> I will add the conditions and disable the allocation in v2 of the patch.
>>> But at any rate, making this an engine->flag would eliminate one pointer
>>> dance.
>> Could be an interesting idea for a separate patch.
> To land first ;)
:)
Sure, I can do that.
>>>> +                prio > max(rq_prio(last), 0);
>>>>    }
>>>>    
>>>>    /**
>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>>>           execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>>    }
>>>>    
>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>> +{
>>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>>> +
>>>> +       GEM_TRACE("%s\n", engine->name);
>>>> +
>>>> +       /*
>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>> +        */
>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>> +
>>>> +       /* trigger preemption to idle */
>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>> Future plans? Because just inserting the branch into the setter of
>>> inject_preempt_context() resolves a lot of conflicts with other work.
>> My arguments for separate function are:
>> - better code readability
>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>> patches will introduce separate function as well
>> - only 4 lines of the function would be common
>> - the name inject_preempt_context() wouldn't match the new purpose, so
>> renaming would be needed
>> - reduced self-documenting code due to two separate preempt methods not
>> having distinct names
>>
>> That's all, I don't have any future plans for it. If you want me to
>> merge the two, let me know.
> The problem that I am worrying about is that we will duplicate bunch of
> other code, the actual ELS[PQ] write is the smaller portion. Plus we
> already have the branch on something much more pleasant.
I see. I don't know any details there, so I'm not able to weigh that.
Just let me know whether this possible duplication outweights the 
arguments I provided, and I will merge these functions.
I'm not overly attached to my solution.
>
>>>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>>>                                     status, buf[2*head + 1],
>>>>                                     execlists->active);
>>>>    
>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>>>> +                       /* Check if switched to active or preempted to active */
>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>>> is some oddity in the code flow.
>> I will check if lack of the change affects test results.
>> Personally, I would keep this change, even if only for allowing simple
>> definition of what HWACK flag means.
> The simple definition is the opposite one, imo. We set the flag after we
> get the corresponding response from HW; any preemption or activate event
> must follow the most recent ELSP write. So that will include the
> preemption event following the preempt-idle write.
>
> Then on deciding that the HW is idle, we apply the complication such
> that execlists->active == 0. (That rule is what breaks the pattern.)
> -Chris
Ok, I will remove this unnecessary condition.
I tested this and lack of it doesn't seem to affect the results.
(I'll be out next week; expect v2 when I'm back)
-Tomasz
Daniele Ceraolo Spurio March 30, 2018, 6:23 p.m. UTC | #5
On 27/03/18 16:27, Chris Wilson wrote:
> Quoting Tomasz Lis (2018-03-27 16:17:59)
>> The patch adds support of preempt-to-idle requesting by setting a proper
>> bit within Execlist Control Register, and receiving preemption result from
>> Context Status Buffer.
>>
>> Preemption in previous gens required a special batch buffer to be executed,
>> so the Command Streamer never preempted to idle directly. In Icelake it is
>> possible, as there is a hardware mechanism to inform the kernel about
>> status of the preemption request.
>>
>> This patch does not cover using the new preemption mechanism when GuC is
>> active.
>>
>> Bspec: 18922
>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 +++++++++++++++++++++++++++-----
>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 800230b..c32580b 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private *dev_priv)
>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>                  ((dev_priv)->info.has_logical_ring_preemption)
>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>   
>>   #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index 4364922..66b6700 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -595,7 +595,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>          GEN(11), \
>>          .ddb_size = 2048, \
>>          .has_csr = 0, \
>> -       .has_logical_ring_elsq = 1
>> +       .has_logical_ring_elsq = 1, \
>> +       .has_hw_preempt_to_idle = 1
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>          GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
>> index 933e316..4eb97b5 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -98,6 +98,7 @@ enum intel_platform {
>>          func(has_logical_ring_contexts); \
>>          func(has_logical_ring_elsq); \
>>          func(has_logical_ring_preemption); \
>> +       func(has_hw_preempt_to_idle); \
>>          func(has_overlay); \
>>          func(has_pooled_eu); \
>>          func(has_psr); \
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index ba7f783..1a22de4 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -153,6 +153,7 @@
>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>   
>>   #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct intel_engine_cs *engine,
>>                                  const struct i915_request *last,
>>                                  int prio)
>>   {
>> -       return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
>> +       return (engine->i915->preempt_context ||
>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
> 
> Well, you haven't actually disabled allocating the preempt_context so...
> 
> But at any rate, making this an engine->flag would eliminate one pointer
> dance.
> 

Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in 
engine->i915->caps.scheduler? That btw like here to be set if 
i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)

>> +                prio > max(rq_prio(last), 0);
>>   }
>>   
>>   /**
>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
>>          execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>   }
>>   
>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>> +{
>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>> +
>> +       GEM_TRACE("%s\n", engine->name);
>> +
>> +       /*
>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>> +        */
>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);

Shouldn't this check be the other way around?

>> +
>> +       /* trigger preemption to idle */
>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
> 
> Future plans? Because just inserting the branch into the setter of
> inject_preempt_context() resolves a lot of conflicts with other work.
> 
>> @@ -962,10 +987,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                                    status, buf[2*head + 1],
>>                                    execlists->active);
>>   
>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>> +                       /* Check if switched to active or preempted to active */
>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
> 
> Setting HWACK here is harmless as it gets cleared again. Unless, there
> is some oddity in the code flow.

There is actually some oddity, but it is more on the HW side. A preempt 
to idle can potentially land on an already idle HW, in which case 
GEN8_CTX_STATUS_ACTIVE_IDLE is not set and GEN8_CTX_STATUS_IDLE_ACTIVE 
is set instead. In this case without this check on 
GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but we 
wouldn't call the clear below. Not sure if we end up clearing the flag 
elsewhere, but that doesn't look too nice IMHO.

BTW, the relevant CSB bits coming out in the 2 preempt to idle cases are 
as follows:

preempt active HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE | 
GEN8_CTX_STATUS_PREEMPTED

Preempt idle HW:
GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE

Daniele

> 
>>                                  execlists_set_active(execlists,
>>                                                       EXECLISTS_ACTIVE_HWACK);
>> +
>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>                                  execlists_clear_active(execlists,
>>                                                         EXECLISTS_ACTIVE_HWACK);
>> @@ -976,8 +1004,13 @@ static void execlists_submission_tasklet(unsigned long data)
>>                          /* We should never get a COMPLETED | IDLE_ACTIVE! */
>>                          GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
>>   
>> -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>> -                           buf[2*head + 1] == execlists->preempt_complete_status) {
>> +                       /*
>> +                        * Check if preempted to real idle, either directly or
>> +                        * the preemptive context already finished executing
>> +                        */
>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>> +                           buf[2*head + 1] == execlists->preempt_complete_status)) {
>>                                  GEM_TRACE("%s preempt-idle\n", engine->name);
> 
> Hmm. I was hoping that we would be able to engineer a single check to
> cover all sins. Might have been overly optimistic, but I can dream.
> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
Daniele Ceraolo Spurio March 30, 2018, 7:45 p.m. UTC | #6
On 30/03/18 08:42, Lis, Tomasz wrote:
> 
> 
> On 2018-03-29 00:28, Chris Wilson wrote:
>> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>>
>>> On 2018-03-28 01:27, Chris Wilson wrote:
>>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>>> The patch adds support of preempt-to-idle requesting by setting a 
>>>>> proper
>>>>> bit within Execlist Control Register, and receiving preemption 
>>>>> result from
>>>>> Context Status Buffer.
>>>>>
>>>>> Preemption in previous gens required a special batch buffer to be 
>>>>> executed,
>>>>> so the Command Streamer never preempted to idle directly. In 
>>>>> Icelake it is
>>>>> possible, as there is a hardware mechanism to inform the kernel about
>>>>> status of the preemption request.
>>>>>
>>>>> This patch does not cover using the new preemption mechanism when 
>>>>> GuC is
>>>>> active.
>>>>>
>>>>> Bspec: 18922
>>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>>>> +++++++++++++++++++++++++++-----
>>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>>> index 800230b..c32580b 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>>>> *dev_priv)
>>>>>                   ((dev_priv)->info.has_logical_ring_elsq)
>>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>>                   ((dev_priv)->info.has_logical_ring_preemption)
>>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>>    #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>> index 4364922..66b6700 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>>>> intel_cannonlake_info = {
>>>>>           GEN(11), \
>>>>>           .ddb_size = 2048, \
>>>>>           .has_csr = 0, \
>>>>> -       .has_logical_ring_elsq = 1
>>>>> +       .has_logical_ring_elsq = 1, \
>>>>> +       .has_hw_preempt_to_idle = 1
>>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>>           GEN11_FEATURES,
>>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>>>> index 933e316..4eb97b5 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>>           func(has_logical_ring_contexts); \
>>>>>           func(has_logical_ring_elsq); \
>>>>>           func(has_logical_ring_preemption); \
>>>>> +       func(has_hw_preempt_to_idle); \
>>>>>           func(has_overlay); \
>>>>>           func(has_pooled_eu); \
>>>>>           func(has_psr); \
>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> index ba7f783..1a22de4 100644
>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>> @@ -153,6 +153,7 @@
>>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>>>> intel_engine_cs *engine,
>>>>>                                   const struct i915_request *last,
>>>>>                                   int prio)
>>>>>    {
>>>>> -       return engine->i915->preempt_context && prio > 
>>>>> max(rq_prio(last), 0);
>>>>> +       return (engine->i915->preempt_context ||
>>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>>> Well, you haven't actually disabled allocating the preempt_context 
>>>> so...
>>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>>> as that would mean adding a temporary condition on GuC until the GuC
>>> preemption is merged.
>>> I will add the conditions and disable the allocation in v2 of the patch.
>>>> But at any rate, making this an engine->flag would eliminate one 
>>>> pointer
>>>> dance.
>>> Could be an interesting idea for a separate patch.
>> To land first ;)
> :)
> Sure, I can do that.
>>>>> +                prio > max(rq_prio(last), 0);
>>>>>    }
>>>>>    /**
>>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>>>> intel_engine_cs *engine)
>>>>>           execlists_set_active(&engine->execlists, 
>>>>> EXECLISTS_ACTIVE_PREEMPT);
>>>>>    }
>>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>>> +{
>>>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>>>> +
>>>>> +       GEM_TRACE("%s\n", engine->name);
>>>>> +
>>>>> +       /*
>>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>>> +        */
>>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>>> +
>>>>> +       /* trigger preemption to idle */
>>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>>> Future plans? Because just inserting the branch into the setter of
>>>> inject_preempt_context() resolves a lot of conflicts with other work.
>>> My arguments for separate function are:
>>> - better code readability
>>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>>> patches will introduce separate function as well
>>> - only 4 lines of the function would be common
>>> - the name inject_preempt_context() wouldn't match the new purpose, so
>>> renaming would be needed
>>> - reduced self-documenting code due to two separate preempt methods not
>>> having distinct names
>>>
>>> That's all, I don't have any future plans for it. If you want me to
>>> merge the two, let me know.
>> The problem that I am worrying about is that we will duplicate bunch of
>> other code, the actual ELS[PQ] write is the smaller portion. Plus we
>> already have the branch on something much more pleasant.
> I see. I don't know any details there, so I'm not able to weigh that.
> Just let me know whether this possible duplication outweights the 
> arguments I provided, and I will merge these functions.
> I'm not overly attached to my solution.
>>
>>>>> @@ -962,10 +987,13 @@ static void 
>>>>> execlists_submission_tasklet(unsigned long data)
>>>>>                                     status, buf[2*head + 1],
>>>>>                                     execlists->active);
>>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>> -                                     GEN8_CTX_STATUS_PREEMPTED))
>>>>> +                       /* Check if switched to active or preempted 
>>>>> to active */
>>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>> +                                       GEN8_CTX_STATUS_PREEMPTED)) &&
>>>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>>>> is some oddity in the code flow.
>>> I will check if lack of the change affects test results.
>>> Personally, I would keep this change, even if only for allowing simple
>>> definition of what HWACK flag means.
>> The simple definition is the opposite one, imo. We set the flag after we
>> get the corresponding response from HW; any preemption or activate event
>> must follow the most recent ELSP write. So that will include the
>> preemption event following the preempt-idle write.
>>
>> Then on deciding that the HW is idle, we apply the complication such
>> that execlists->active == 0. (That rule is what breaks the pattern.)
>> -Chris
> Ok, I will remove this unnecessary condition.
> I tested this and lack of it doesn't seem to affect the results.
> (I'll be out next week; expect v2 when I'm back)
> -Tomasz
> 

Do we have any test to cover a preempt to idle on already idle HW (which 
is the case we cover with this flag here)? If not maybe we cold add a 
selftest for that.

Daniele

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lis, Tomasz April 12, 2018, 5:15 p.m. UTC | #7
On 2018-03-30 20:23, Daniele Ceraolo Spurio wrote:
>
>
> On 27/03/18 16:27, Chris Wilson wrote:
>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>> The patch adds support of preempt-to-idle requesting by setting a 
>>> proper
>>> bit within Execlist Control Register, and receiving preemption 
>>> result from
>>> Context Status Buffer.
>>>
>>> Preemption in previous gens required a special batch buffer to be 
>>> executed,
>>> so the Command Streamer never preempted to idle directly. In Icelake 
>>> it is
>>> possible, as there is a hardware mechanism to inform the kernel about
>>> status of the preemption request.
>>>
>>> This patch does not cover using the new preemption mechanism when 
>>> GuC is
>>> active.
>>>
>>> Bspec: 18922
>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>   drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>   drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>   drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>> +++++++++++++++++++++++++++-----
>>>   drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>   5 files changed, 45 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 800230b..c32580b 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>> *dev_priv)
>>>                  ((dev_priv)->info.has_logical_ring_elsq)
>>>   #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>> ((dev_priv)->info.has_logical_ring_preemption)
>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>> +               ((dev_priv)->info.has_hw_preempt_to_idle)
>>>     #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>   diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>> b/drivers/gpu/drm/i915/i915_pci.c
>>> index 4364922..66b6700 100644
>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>> intel_cannonlake_info = {
>>>          GEN(11), \
>>>          .ddb_size = 2048, \
>>>          .has_csr = 0, \
>>> -       .has_logical_ring_elsq = 1
>>> +       .has_logical_ring_elsq = 1, \
>>> +       .has_hw_preempt_to_idle = 1
>>>     static const struct intel_device_info intel_icelake_11_info = {
>>>          GEN11_FEATURES,
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>> index 933e316..4eb97b5 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>          func(has_logical_ring_contexts); \
>>>          func(has_logical_ring_elsq); \
>>>          func(has_logical_ring_preemption); \
>>> +       func(has_hw_preempt_to_idle); \
>>>          func(has_overlay); \
>>>          func(has_pooled_eu); \
>>>          func(has_psr); \
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index ba7f783..1a22de4 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -153,6 +153,7 @@
>>>   #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>   #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>   #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>     #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>           (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>> intel_engine_cs *engine,
>>>                                  const struct i915_request *last,
>>>                                  int prio)
>>>   {
>>> -       return engine->i915->preempt_context && prio > 
>>> max(rq_prio(last), 0);
>>> +       return (engine->i915->preempt_context ||
>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>
>> Well, you haven't actually disabled allocating the preempt_context so...
>>
>> But at any rate, making this an engine->flag would eliminate one pointer
>> dance.
>>
>
> Can't we re-use I915_SCHEDULER_CAP_PREEMPTION in 
> engine->i915->caps.scheduler? That btw like here to be set if 
> i915->preempt_context || HAS_HW_PREEMPT_TO_IDLE(i915)
The engine->flag which Chris introduced is now used to set 
I915_SCHEDULER_CAP_PREEMPTION.
>
>>> +                prio > max(rq_prio(last), 0);
>>>   }
>>>     /**
>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>> intel_engine_cs *engine)
>>>          execlists_set_active(&engine->execlists, 
>>> EXECLISTS_ACTIVE_PREEMPT);
>>>   }
>>>   +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>> +{
>>> +       struct intel_engine_execlists *execlists = &engine->execlists;
>>> +
>>> +       GEM_TRACE("%s\n", engine->name);
>>> +
>>> +       /*
>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>> +        */
>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>
> Shouldn't this check be the other way around?
Wow. I have no idea how I was able to test this patch and not trigger 
this. You are right.
>
>>> +
>>> +       /* trigger preemption to idle */
>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>
>> Future plans? Because just inserting the branch into the setter of
>> inject_preempt_context() resolves a lot of conflicts with other work.
>>
>>> @@ -962,10 +987,13 @@ static void 
>>> execlists_submission_tasklet(unsigned long data)
>>>                                    status, buf[2*head + 1],
>>>                                    execlists->active);
>>>   -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> - GEN8_CTX_STATUS_PREEMPTED))
>>> +                       /* Check if switched to active or preempted 
>>> to active */
>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>> + GEN8_CTX_STATUS_PREEMPTED)) &&
>>> +                           !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
>>
>> Setting HWACK here is harmless as it gets cleared again. Unless, there
>> is some oddity in the code flow.
>
> There is actually some oddity, but it is more on the HW side. A 
> preempt to idle can potentially land on an already idle HW, in which 
> case GEN8_CTX_STATUS_ACTIVE_IDLE is not set and 
> GEN8_CTX_STATUS_IDLE_ACTIVE is set instead. In this case without this 
> check on GEN11_CTX_STATUS_PREEMPT_IDLE we would set the HWACK here but 
> we wouldn't call the clear below. Not sure if we end up clearing the 
> flag elsewhere, but that doesn't look too nice IMHO.
>
> BTW, the relevant CSB bits coming out in the 2 preempt to idle cases 
> are as follows:
>
> preempt active HW:
> GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_ACTIVE_IDLE | 
> GEN8_CTX_STATUS_PREEMPTED
>
> Preempt idle HW:
> GEN11_CTX_STATUS_PREEMPT_IDLE | GEN8_CTX_STATUS_IDLE_ACTIVE
>
> Daniele
Thanks Daniele, this makes things a lot clearer.
Considering also HWACK description from Chris, I will add a condition to 
execlists_clear_active() below instead of  here.
-Tomasz
>
>>
>>> execlists_set_active(execlists,
>>> EXECLISTS_ACTIVE_HWACK);
>>> +
>>>                          if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
>>> execlists_clear_active(execlists,
>>> EXECLISTS_ACTIVE_HWACK);
>>> @@ -976,8 +1004,13 @@ static void 
>>> execlists_submission_tasklet(unsigned long data)
>>>                          /* We should never get a COMPLETED | 
>>> IDLE_ACTIVE! */
>>>                          GEM_BUG_ON(status & 
>>> GEN8_CTX_STATUS_IDLE_ACTIVE);
>>>   -                       if (status & GEN8_CTX_STATUS_COMPLETE &&
>>> -                           buf[2*head + 1] == 
>>> execlists->preempt_complete_status) {
>>> +                       /*
>>> +                        * Check if preempted to real idle, either 
>>> directly or
>>> +                        * the preemptive context already finished 
>>> executing
>>> +                        */
>>> +                       if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
>>> +                           (status & GEN8_CTX_STATUS_COMPLETE &&
>>> +                           buf[2*head + 1] == 
>>> execlists->preempt_complete_status)) {
>>>                                  GEM_TRACE("%s preempt-idle\n", 
>>> engine->name);
>>
>> Hmm. I was hoping that we would be able to engineer a single check to
>> cover all sins. Might have been overly optimistic, but I can dream.
>> -Chris
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>>
Lis, Tomasz April 26, 2018, 2:02 p.m. UTC | #8
On 2018-03-30 21:45, Daniele Ceraolo Spurio wrote:
>
>
> On 30/03/18 08:42, Lis, Tomasz wrote:
>>
>>
>> On 2018-03-29 00:28, Chris Wilson wrote:
>>> Quoting Lis, Tomasz (2018-03-28 17:06:58)
>>>>
>>>> On 2018-03-28 01:27, Chris Wilson wrote:
>>>>> Quoting Tomasz Lis (2018-03-27 16:17:59)
>>>>>> The patch adds support of preempt-to-idle requesting by setting a 
>>>>>> proper
>>>>>> bit within Execlist Control Register, and receiving preemption 
>>>>>> result from
>>>>>> Context Status Buffer.
>>>>>>
>>>>>> Preemption in previous gens required a special batch buffer to be 
>>>>>> executed,
>>>>>> so the Command Streamer never preempted to idle directly. In 
>>>>>> Icelake it is
>>>>>> possible, as there is a hardware mechanism to inform the kernel 
>>>>>> about
>>>>>> status of the preemption request.
>>>>>>
>>>>>> This patch does not cover using the new preemption mechanism when 
>>>>>> GuC is
>>>>>> active.
>>>>>>
>>>>>> Bspec: 18922
>>>>>> Signed-off-by: Tomasz Lis <tomasz.lis@intel.com>
>>>>>> ---
>>>>>>    drivers/gpu/drm/i915/i915_drv.h          |  2 ++
>>>>>>    drivers/gpu/drm/i915/i915_pci.c          |  3 ++-
>>>>>>    drivers/gpu/drm/i915/intel_device_info.h |  1 +
>>>>>>    drivers/gpu/drm/i915/intel_lrc.c         | 45 
>>>>>> +++++++++++++++++++++++++++-----
>>>>>>    drivers/gpu/drm/i915/intel_lrc.h         |  1 +
>>>>>>    5 files changed, 45 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>>>>> b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> index 800230b..c32580b 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>>>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>>>>> @@ -2514,6 +2514,8 @@ intel_info(const struct drm_i915_private 
>>>>>> *dev_priv)
>>>>>> ((dev_priv)->info.has_logical_ring_elsq)
>>>>>>    #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
>>>>>> ((dev_priv)->info.has_logical_ring_preemption)
>>>>>> +#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
>>>>>> + ((dev_priv)->info.has_hw_preempt_to_idle)
>>>>>>    #define HAS_EXECLISTS(dev_priv) 
>>>>>> HAS_LOGICAL_RING_CONTEXTS(dev_priv)
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c 
>>>>>> b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> index 4364922..66b6700 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>>>>>> @@ -595,7 +595,8 @@ static const struct intel_device_info 
>>>>>> intel_cannonlake_info = {
>>>>>>           GEN(11), \
>>>>>>           .ddb_size = 2048, \
>>>>>>           .has_csr = 0, \
>>>>>> -       .has_logical_ring_elsq = 1
>>>>>> +       .has_logical_ring_elsq = 1, \
>>>>>> +       .has_hw_preempt_to_idle = 1
>>>>>>    static const struct intel_device_info intel_icelake_11_info = {
>>>>>>           GEN11_FEATURES,
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>>>>>> b/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> index 933e316..4eb97b5 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>>>>>> @@ -98,6 +98,7 @@ enum intel_platform {
>>>>>>           func(has_logical_ring_contexts); \
>>>>>>           func(has_logical_ring_elsq); \
>>>>>>           func(has_logical_ring_preemption); \
>>>>>> +       func(has_hw_preempt_to_idle); \
>>>>>>           func(has_overlay); \
>>>>>>           func(has_pooled_eu); \
>>>>>>           func(has_psr); \
>>>>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>>>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> index ba7f783..1a22de4 100644
>>>>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>>>>> @@ -153,6 +153,7 @@
>>>>>>    #define GEN8_CTX_STATUS_ACTIVE_IDLE    (1 << 3)
>>>>>>    #define GEN8_CTX_STATUS_COMPLETE       (1 << 4)
>>>>>>    #define GEN8_CTX_STATUS_LITE_RESTORE   (1 << 15)
>>>>>> +#define GEN11_CTX_STATUS_PREEMPT_IDLE  (1 << 29)
>>>>>>    #define GEN8_CTX_STATUS_COMPLETED_MASK \
>>>>>>            (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
>>>>>> @@ -183,7 +184,9 @@ static inline bool need_preempt(const struct 
>>>>>> intel_engine_cs *engine,
>>>>>>                                   const struct i915_request *last,
>>>>>>                                   int prio)
>>>>>>    {
>>>>>> -       return engine->i915->preempt_context && prio > 
>>>>>> max(rq_prio(last), 0);
>>>>>> +       return (engine->i915->preempt_context ||
>>>>>> +               HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
>>>>> Well, you haven't actually disabled allocating the preempt_context 
>>>>> so...
>>>> Yes.. I had mixed feelings about changing needs_preempt_context() now,
>>>> as that would mean adding a temporary condition on GuC until the GuC
>>>> preemption is merged.
>>>> I will add the conditions and disable the allocation in v2 of the 
>>>> patch.
>>>>> But at any rate, making this an engine->flag would eliminate one 
>>>>> pointer
>>>>> dance.
>>>> Could be an interesting idea for a separate patch.
>>> To land first ;)
>> :)
>> Sure, I can do that.
>>>>>> +                prio > max(rq_prio(last), 0);
>>>>>>    }
>>>>>>    /**
>>>>>> @@ -535,6 +538,25 @@ static void inject_preempt_context(struct 
>>>>>> intel_engine_cs *engine)
>>>>>> execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
>>>>>>    }
>>>>>> +static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
>>>>>> +{
>>>>>> +       struct intel_engine_execlists *execlists = 
>>>>>> &engine->execlists;
>>>>>> +
>>>>>> +       GEM_TRACE("%s\n", engine->name);
>>>>>> +
>>>>>> +       /*
>>>>>> +        * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
>>>>>> +        * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
>>>>>> +        */
>>>>>> +       GEM_BUG_ON(execlists->ctrl_reg != NULL);
>>>>>> +
>>>>>> +       /* trigger preemption to idle */
>>>>>> +       writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
>>>>> Future plans? Because just inserting the branch into the setter of
>>>>> inject_preempt_context() resolves a lot of conflicts with other work.
>>>> My arguments for separate function are:
>>>> - better code readability
>>>> - keeping the symmetry between execlist and GuC flow - GuC preemption
>>>> patches will introduce separate function as well
>>>> - only 4 lines of the function would be common
>>>> - the name inject_preempt_context() wouldn't match the new purpose, so
>>>> renaming would be needed
>>>> - reduced self-documenting code due to two separate preempt methods 
>>>> not
>>>> having distinct names
>>>>
>>>> That's all, I don't have any future plans for it. If you want me to
>>>> merge the two, let me know.
>>> The problem that I am worrying about is that we will duplicate bunch of
>>> other code, the actual ELS[PQ] write is the smaller portion. Plus we
>>> already have the branch on something much more pleasant.
>> I see. I don't know any details there, so I'm not able to weigh that.
>> Just let me know whether this possible duplication outweights the 
>> arguments I provided, and I will merge these functions.
>> I'm not overly attached to my solution.
>>>
>>>>>> @@ -962,10 +987,13 @@ static void 
>>>>>> execlists_submission_tasklet(unsigned long data)
>>>>>>                                     status, buf[2*head + 1],
>>>>>> execlists->active);
>>>>>> -                       if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>>> - GEN8_CTX_STATUS_PREEMPTED))
>>>>>> +                       /* Check if switched to active or 
>>>>>> preempted to active */
>>>>>> +                       if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
>>>>>> + GEN8_CTX_STATUS_PREEMPTED)) &&
>>>>>> +                           !(status & 
>>>>>> GEN11_CTX_STATUS_PREEMPT_IDLE))
>>>>> Setting HWACK here is harmless as it gets cleared again. Unless, 
>>>>> there
>>>>> is some oddity in the code flow.
>>>> I will check if lack of the change affects test results.
>>>> Personally, I would keep this change, even if only for allowing simple
>>>> definition of what HWACK flag means.
>>> The simple definition is the opposite one, imo. We set the flag 
>>> after we
>>> get the corresponding response from HW; any preemption or activate 
>>> event
>>> must follow the most recent ELSP write. So that will include the
>>> preemption event following the preempt-idle write.
>>>
>>> Then on deciding that the HW is idle, we apply the complication such
>>> that execlists->active == 0. (That rule is what breaks the pattern.)
>>> -Chris
>> Ok, I will remove this unnecessary condition.
>> I tested this and lack of it doesn't seem to affect the results.
>> (I'll be out next week; expect v2 when I'm back)
>> -Tomasz
>>
>
> Do we have any test to cover a preempt to idle on already idle HW 
> (which is the case we cover with this flag here)? If not maybe we cold 
> add a selftest for that.
>
> Daniele
Looks like this case is not tested.
Also, it looks like there is a bug of some kind. Preemption-specific 
tests are passing, but I'm getting fails (with occasional passes) in 
smoketest-* cases from gem_exec_schedule.
I am working on diagnosing that.
>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lis, Tomasz May 11, 2018, 3:45 p.m. UTC | #9
The review of v2 changes touched issues which were addressed in a different way
than planned in that review:

1. Context status processing

While the review went towards finding common path to new preemption flag
combinations and existing cases, I decided to split the two ways, because
the !(status & GEN8_CTX_STATUS_COMPLETED_MASK) condition had to be skipped
from the preemption flow. While pre-gen11 preemption always executed the
preemptive context which guaranteed that flag set, nothing finishes execution
in the new to-real-idle preemption.

2. Testing of the idle-to-idle preemption case

It turns out the IGT test 'smoketest-render' from 'gem_exec_schedule' group
triggers the idle-to-idle case. The test submits a lot of very  short execs
with changing priorities. Its run triggers preemption many times. and due to
the short exec buffer, triggers idle-to-idle case as well, sooner or later.

Tomasz Lis (1):
  drm/i915/gen11: Preempt-to-idle support in execlists.

 drivers/gpu/drm/i915/i915_drv.h          |   2 +
 drivers/gpu/drm/i915/i915_gem_context.c  |   5 +-
 drivers/gpu/drm/i915/i915_pci.c          |   3 +-
 drivers/gpu/drm/i915/intel_device_info.h |   1 +
 drivers/gpu/drm/i915/intel_lrc.c         | 115 ++++++++++++++++++++++---------
 drivers/gpu/drm/i915/intel_lrc.h         |   1 +
 6 files changed, 92 insertions(+), 35 deletions(-)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 800230b..c32580b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2514,6 +2514,8 @@  intel_info(const struct drm_i915_private *dev_priv)
 		((dev_priv)->info.has_logical_ring_elsq)
 #define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
 		((dev_priv)->info.has_logical_ring_preemption)
+#define HAS_HW_PREEMPT_TO_IDLE(dev_priv) \
+		((dev_priv)->info.has_hw_preempt_to_idle)
 
 #define HAS_EXECLISTS(dev_priv) HAS_LOGICAL_RING_CONTEXTS(dev_priv)
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 4364922..66b6700 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -595,7 +595,8 @@  static const struct intel_device_info intel_cannonlake_info = {
 	GEN(11), \
 	.ddb_size = 2048, \
 	.has_csr = 0, \
-	.has_logical_ring_elsq = 1
+	.has_logical_ring_elsq = 1, \
+	.has_hw_preempt_to_idle = 1
 
 static const struct intel_device_info intel_icelake_11_info = {
 	GEN11_FEATURES,
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 933e316..4eb97b5 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -98,6 +98,7 @@  enum intel_platform {
 	func(has_logical_ring_contexts); \
 	func(has_logical_ring_elsq); \
 	func(has_logical_ring_preemption); \
+	func(has_hw_preempt_to_idle); \
 	func(has_overlay); \
 	func(has_pooled_eu); \
 	func(has_psr); \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index ba7f783..1a22de4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -153,6 +153,7 @@ 
 #define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
 #define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
 #define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+#define GEN11_CTX_STATUS_PREEMPT_IDLE	(1 << 29)
 
 #define GEN8_CTX_STATUS_COMPLETED_MASK \
 	 (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED)
@@ -183,7 +184,9 @@  static inline bool need_preempt(const struct intel_engine_cs *engine,
 				const struct i915_request *last,
 				int prio)
 {
-	return engine->i915->preempt_context && prio > max(rq_prio(last), 0);
+	return (engine->i915->preempt_context ||
+		HAS_HW_PREEMPT_TO_IDLE(engine->i915)) &&
+		 prio > max(rq_prio(last), 0);
 }
 
 /**
@@ -535,6 +538,25 @@  static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
 }
 
+static void gen11_preempt_to_idle(struct intel_engine_cs *engine)
+{
+	struct intel_engine_execlists *execlists = &engine->execlists;
+
+	GEM_TRACE("%s\n", engine->name);
+
+	/*
+	 * hardware which HAS_HW_PREEMPT_TO_IDLE(), always also
+	 * HAS_LOGICAL_RING_ELSQ(), so we can assume ctrl_reg is set
+	 */
+	GEM_BUG_ON(execlists->ctrl_reg != NULL);
+
+	/* trigger preemption to idle */
+	writel(EL_CTRL_PREEMPT_TO_IDLE, execlists->ctrl_reg);
+
+	execlists_clear_active(execlists, EXECLISTS_ACTIVE_HWACK);
+	execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -594,7 +616,10 @@  static void execlists_dequeue(struct intel_engine_cs *engine)
 			goto unlock;
 
 		if (need_preempt(engine, last, execlists->queue_priority)) {
-			inject_preempt_context(engine);
+			if (HAS_HW_PREEMPT_TO_IDLE(engine->i915))
+				gen11_preempt_to_idle(engine);
+			else
+				inject_preempt_context(engine);
 			goto unlock;
 		}
 
@@ -962,10 +987,13 @@  static void execlists_submission_tasklet(unsigned long data)
 				  status, buf[2*head + 1],
 				  execlists->active);
 
-			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
-				      GEN8_CTX_STATUS_PREEMPTED))
+			/* Check if switched to active or preempted to active */
+			if ((status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
+					GEN8_CTX_STATUS_PREEMPTED)) &&
+			    !(status & GEN11_CTX_STATUS_PREEMPT_IDLE))
 				execlists_set_active(execlists,
 						     EXECLISTS_ACTIVE_HWACK);
+
 			if (status & GEN8_CTX_STATUS_ACTIVE_IDLE)
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_HWACK);
@@ -976,8 +1004,13 @@  static void execlists_submission_tasklet(unsigned long data)
 			/* We should never get a COMPLETED | IDLE_ACTIVE! */
 			GEM_BUG_ON(status & GEN8_CTX_STATUS_IDLE_ACTIVE);
 
-			if (status & GEN8_CTX_STATUS_COMPLETE &&
-			    buf[2*head + 1] == execlists->preempt_complete_status) {
+			/*
+			 * Check if preempted to real idle, either directly or
+			 * the preemptive context already finished executing
+			 */
+			if ((status & GEN11_CTX_STATUS_PREEMPT_IDLE) ||
+			    (status & GEN8_CTX_STATUS_COMPLETE &&
+			    buf[2*head + 1] == execlists->preempt_complete_status)) {
 				GEM_TRACE("%s preempt-idle\n", engine->name);
 
 				execlists_cancel_port_requests(execlists);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index 59d7b86..958d1b3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -45,6 +45,7 @@ 
 #define RING_EXECLIST_SQ_CONTENTS(engine)	_MMIO((engine)->mmio_base + 0x510)
 #define RING_EXECLIST_CONTROL(engine)		_MMIO((engine)->mmio_base + 0x550)
 #define	  EL_CTRL_LOAD				(1 << 0)
+#define	  EL_CTRL_PREEMPT_TO_IDLE		(1 << 1)
 
 /* The docs specify that the write pointer wraps around after 5h, "After status
  * is written out to the last available status QW at offset 5h, this pointer