diff mbox

[3/4] drm/i915: HSW GT3 Slices: exec flag to warn kernel that userspace is using predication

Message ID 1381870249-30614-1-git-send-email-rodrigo.vivi@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Rodrigo Vivi Oct. 15, 2013, 8:50 p.m. UTC
If Userspace isn't using MI_PREDICATE all slices must be enabled for
backward compatibility.

If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
all slices on.

v2: fix the inverted logic for backwards compatibility
    USE_PREDICATE unset force gt_full when defaul is half
    instead of GT_FULL flag.

CC: Eric Anholt <eric@anholt.net>
CC: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  8 ++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
 drivers/gpu/drm/i915/i915_sysfs.c          | 11 ++++-
 drivers/gpu/drm/i915/intel_display.c       |  6 +++
 drivers/gpu/drm/i915/intel_drv.h           |  1 +
 drivers/gpu/drm/i915/intel_pm.c            | 39 ++++++++++++++++-
 include/uapi/drm/i915_drm.h                |  8 +++-
 8 files changed, 146 insertions(+), 5 deletions(-)

Comments

Chris Wilson Oct. 16, 2013, 9:15 a.m. UTC | #1
On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote:
> If Userspace isn't using MI_PREDICATE all slices must be enabled for
> backward compatibility.
> 
> If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
> all slices on.
> 
> v2: fix the inverted logic for backwards compatibility
>     USE_PREDICATE unset force gt_full when defaul is half
>     instead of GT_FULL flag.
> 
> CC: Eric Anholt <eric@anholt.net>
> CC: Kenneth Graunke <kenneth@whitecape.org>
> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  8 ++++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
>  drivers/gpu/drm/i915/i915_sysfs.c          | 11 ++++-
>  drivers/gpu/drm/i915/intel_display.c       |  6 +++
>  drivers/gpu/drm/i915/intel_drv.h           |  1 +
>  drivers/gpu/drm/i915/intel_pm.c            | 39 ++++++++++++++++-
>  include/uapi/drm/i915_drm.h                |  8 +++-
>  8 files changed, 146 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 685fb1d..bd4774e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1219,6 +1219,12 @@ struct i915_package_c8 {
>  	} regsave;
>  };
>  
> +struct i915_gt_slices {
> +	int state_default;

Either use it as an integer (a count of slices enabled by default) or
make it an enum.

> +	int forcing_full;

bool legacy_userspace_busy; ?

> +	struct mutex m_lock;

Ugh. Just struct mutex lock; /* locks all access to this struct and
slice registers */ will suffice

> +};
> +
>  typedef struct drm_i915_private {
>  	struct drm_device *dev;
>  	struct kmem_cache *slab;
> @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private {
>  
>  	struct i915_package_c8 pc8;
>  
> +	struct i915_gt_slices gt_slices;
> +
>  	/* Old dri1 support infrastructure, beware the dragons ya fools entering
>  	 * here! */
>  	struct i915_dri1_state dri1;
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 0ce0d47..eb09a51 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>  }
>  
>  static int
> +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)

That's a misnomer.

> +{
> +	drm_i915_private_t *dev_priv = dev->dev_private;
> +	int ret;
> +
> +	if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
> +		return 0;
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, HSW_GT_SLICE_INFO);
> +	intel_ring_emit(ring, SLICE_SEL_BOTH);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
> +	intel_ring_emit(ring, LOWER_SLICE_ENABLED);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, HSW_SLICESHUTDOWN);
> +	intel_ring_emit(ring, ~SLICE_SHUTDOWN);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
> +	intel_ring_emit(ring, CS_IDLE_COUNT_1US);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
> +	intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
> +	intel_ring_advance(ring);
> +
> +	ret = intel_ring_begin(ring, 3);
> +	if (ret)
> +		return ret;
> +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> +	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
> +	intel_ring_emit(ring, CS_IDLE_COUNT_5US);
> +	intel_ring_advance(ring);
> +
> +	return 0;
> +}
> +
> +static int
>  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		       struct drm_file *file,
>  		       struct drm_i915_gem_execbuffer2 *args,
> @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  		return -EINVAL;
>  	}
>  
> +	if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 &&
> +	    dev_priv->gt_slices.state_default == 0 &&
> +	    !dev_priv->gt_slices.forcing_full) {

Always set legacy_userspace_busy here so that a change in state_default
cannot possibly break currently active users i.e. they are independent
bookkeeping.

> +		dev_priv->gt_slices.forcing_full = true;
> +		i915_gt_full_immediate(dev, ring);

What happens when this fails? If it only partially emits the commands,
etc.

> +	}
> +
>  	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>  	mask = I915_EXEC_CONSTANTS_MASK;
>  	switch (mode) {
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 497c441..0146bef 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -277,6 +277,17 @@
>  #define   SLICE_STATUS_MAIN_ON	(2<<0)
>  #define   SLICE_STATUS_BOTH_ON	(3<<0)
>  
> +#define HSW_SLICESHUTDOWN	0xA190
> +#define   SLICE_SHUTDOWN	(1<<0)
> +
> +#define RC_IDLE_MAX_COUNT	0x2054
> +#define   CS_IDLE_COUNT_1US	(1<<1)
> +#define   CS_IDLE_COUNT_5US	(1<<3)
> +
> +#define WAIT_FOR_RC6_EXIT	0x20CC
> +#define   WAIT_RC6_EXIT		(1<<0)
> +#define   MASK_WAIT_RC6_EXIT	(1<<16)
> +
>  /*
>   * 3D instructions used by the kernel
>   */
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index 86ccd52..25b0c5b 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev,
>  	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>  	struct drm_device *dev = minor->dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> +	bool full;
>  
> -	return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
> -		       LOWER_SLICE_ENABLED ? "full" : "half");
> +	mutex_lock(&dev_priv->gt_slices.m_lock);
> +	full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
> +	mutex_unlock(&dev_priv->gt_slices.m_lock);

This locking is not stopping any races - it is superfluous.

> +	return sprintf(buf, "%s\n", full ? "full" : "half");
>  }
>  
>  static ssize_t gt_slice_config_store(struct device *kdev,
> @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev,
>  {
>  	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>  	struct drm_device *dev = minor->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  	int ret;
>  
>  	if (!strncmp(buf, "full", sizeof("full") - 1)) {
>  		ret = intel_set_gt_full(dev);
>  		if (ret)
>  			return ret;
> +		dev_priv->gt_slices.state_default = 1;
>  	} else if (!strncmp(buf, "half", sizeof("half") - 1)) {
>  		ret = intel_set_gt_half(dev);
>  		if (ret)
>  			return ret;
> +		dev_priv->gt_slices.state_default = 0;
>  	} else
>  		return -EINVAL;
>  	return count;
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 4f1b636..1a2f8bc 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev)
>  
>  	if (dev_priv->info->gen >= 6)
>  		gen6_rps_idle(dev->dev_private);
> +
> +	if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
> +	    dev_priv->gt_slices.state_default == 0) {
> +		intel_set_gt_half_async(dev);
> +		dev_priv->gt_slices.forcing_full = false;

Again, you want to be ignoring state_default when changing
legacy_userspace_busy.

The lower level slice config handling code should be making the judgment
based on all parameters after one changes.
-Chris
Rodrigo Vivi Oct. 16, 2013, 12:12 p.m. UTC | #2
Hi Chris,

Thank you very much for all suggestions. Will do the changes. But for
that question below I don't have the clear answer:

On Wed, Oct 16, 2013 at 6:15 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 15, 2013 at 05:50:49PM -0300, Rodrigo Vivi wrote:
>> If Userspace isn't using MI_PREDICATE all slices must be enabled for
>> backward compatibility.
>>
>> If I915_EXEC_USE_PREDICATE isn't set and defaul is set to half, kernel will force
>> all slices on.
>>
>> v2: fix the inverted logic for backwards compatibility
>>     USE_PREDICATE unset force gt_full when defaul is half
>>     instead of GT_FULL flag.
>>
>> CC: Eric Anholt <eric@anholt.net>
>> CC: Kenneth Graunke <kenneth@whitecape.org>
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@gmail.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h            |  8 ++++
>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 67 ++++++++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/i915_reg.h            | 11 +++++
>>  drivers/gpu/drm/i915/i915_sysfs.c          | 11 ++++-
>>  drivers/gpu/drm/i915/intel_display.c       |  6 +++
>>  drivers/gpu/drm/i915/intel_drv.h           |  1 +
>>  drivers/gpu/drm/i915/intel_pm.c            | 39 ++++++++++++++++-
>>  include/uapi/drm/i915_drm.h                |  8 +++-
>>  8 files changed, 146 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 685fb1d..bd4774e 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1219,6 +1219,12 @@ struct i915_package_c8 {
>>       } regsave;
>>  };
>>
>> +struct i915_gt_slices {
>> +     int state_default;
>
> Either use it as an integer (a count of slices enabled by default) or
> make it an enum.
>
>> +     int forcing_full;
>
> bool legacy_userspace_busy; ?
>
>> +     struct mutex m_lock;
>
> Ugh. Just struct mutex lock; /* locks all access to this struct and
> slice registers */ will suffice
>
>> +};
>> +
>>  typedef struct drm_i915_private {
>>       struct drm_device *dev;
>>       struct kmem_cache *slab;
>> @@ -1418,6 +1424,8 @@ typedef struct drm_i915_private {
>>
>>       struct i915_package_c8 pc8;
>>
>> +     struct i915_gt_slices gt_slices;
>> +
>>       /* Old dri1 support infrastructure, beware the dragons ya fools entering
>>        * here! */
>>       struct i915_dri1_state dri1;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index 0ce0d47..eb09a51 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -923,6 +923,66 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
>>  }
>>
>>  static int
>> +i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)
>
> That's a misnomer.
>
>> +{
>> +     drm_i915_private_t *dev_priv = dev->dev_private;
>> +     int ret;
>> +
>> +     if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
>> +             return 0;
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, HSW_GT_SLICE_INFO);
>> +     intel_ring_emit(ring, SLICE_SEL_BOTH);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
>> +     intel_ring_emit(ring, LOWER_SLICE_ENABLED);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, HSW_SLICESHUTDOWN);
>> +     intel_ring_emit(ring, ~SLICE_SHUTDOWN);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> +     intel_ring_emit(ring, CS_IDLE_COUNT_1US);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
>> +     intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
>> +     intel_ring_advance(ring);
>> +
>> +     ret = intel_ring_begin(ring, 3);
>> +     if (ret)
>> +             return ret;
>> +     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> +     intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
>> +     intel_ring_emit(ring, CS_IDLE_COUNT_5US);
>> +     intel_ring_advance(ring);
>> +
>> +     return 0;
>> +}
>> +
>> +static int
>>  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>                      struct drm_file *file,
>>                      struct drm_i915_gem_execbuffer2 *args,
>> @@ -999,6 +1059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>>               return -EINVAL;
>>       }
>>
>> +     if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 &&
>> +         dev_priv->gt_slices.state_default == 0 &&
>> +         !dev_priv->gt_slices.forcing_full) {
>
> Always set legacy_userspace_busy here so that a change in state_default
> cannot possibly break currently active users i.e. they are independent
> bookkeeping.
>
>> +             dev_priv->gt_slices.forcing_full = true;
>> +             i915_gt_full_immediate(dev, ring);
>
> What happens when this fails? If it only partially emits the commands,
> etc.

This is a very good question. If it fails, my only concern would be
the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I
don't know how to do this during the exec buf without delaying the
execution. Do you have any suggestion?

>
>> +     }
>> +
>>       mode = args->flags & I915_EXEC_CONSTANTS_MASK;
>>       mask = I915_EXEC_CONSTANTS_MASK;
>>       switch (mode) {
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index 497c441..0146bef 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -277,6 +277,17 @@
>>  #define   SLICE_STATUS_MAIN_ON       (2<<0)
>>  #define   SLICE_STATUS_BOTH_ON       (3<<0)
>>
>> +#define HSW_SLICESHUTDOWN    0xA190
>> +#define   SLICE_SHUTDOWN     (1<<0)
>> +
>> +#define RC_IDLE_MAX_COUNT    0x2054
>> +#define   CS_IDLE_COUNT_1US  (1<<1)
>> +#define   CS_IDLE_COUNT_5US  (1<<3)
>> +
>> +#define WAIT_FOR_RC6_EXIT    0x20CC
>> +#define   WAIT_RC6_EXIT              (1<<0)
>> +#define   MASK_WAIT_RC6_EXIT (1<<16)
>> +
>>  /*
>>   * 3D instructions used by the kernel
>>   */
>> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
>> index 86ccd52..25b0c5b 100644
>> --- a/drivers/gpu/drm/i915/i915_sysfs.c
>> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
>> @@ -124,9 +124,13 @@ static ssize_t gt_slice_config_show(struct device *kdev,
>>       struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>>       struct drm_device *dev = minor->dev;
>>       struct drm_i915_private *dev_priv = dev->dev_private;
>> +     bool full;
>>
>> -     return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
>> -                    LOWER_SLICE_ENABLED ? "full" : "half");
>> +     mutex_lock(&dev_priv->gt_slices.m_lock);
>> +     full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
>> +     mutex_unlock(&dev_priv->gt_slices.m_lock);
>
> This locking is not stopping any races - it is superfluous.
>
>> +     return sprintf(buf, "%s\n", full ? "full" : "half");
>>  }
>>
>>  static ssize_t gt_slice_config_store(struct device *kdev,
>> @@ -135,16 +139,19 @@ static ssize_t gt_slice_config_store(struct device *kdev,
>>  {
>>       struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
>>       struct drm_device *dev = minor->dev;
>> +     struct drm_i915_private *dev_priv = dev->dev_private;
>>       int ret;
>>
>>       if (!strncmp(buf, "full", sizeof("full") - 1)) {
>>               ret = intel_set_gt_full(dev);
>>               if (ret)
>>                       return ret;
>> +             dev_priv->gt_slices.state_default = 1;
>>       } else if (!strncmp(buf, "half", sizeof("half") - 1)) {
>>               ret = intel_set_gt_half(dev);
>>               if (ret)
>>                       return ret;
>> +             dev_priv->gt_slices.state_default = 0;
>>       } else
>>               return -EINVAL;
>>       return count;
>> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
>> index 4f1b636..1a2f8bc 100644
>> --- a/drivers/gpu/drm/i915/intel_display.c
>> +++ b/drivers/gpu/drm/i915/intel_display.c
>> @@ -7778,6 +7778,12 @@ void intel_mark_idle(struct drm_device *dev)
>>
>>       if (dev_priv->info->gen >= 6)
>>               gen6_rps_idle(dev->dev_private);
>> +
>> +     if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
>> +         dev_priv->gt_slices.state_default == 0) {
>> +             intel_set_gt_half_async(dev);
>> +             dev_priv->gt_slices.forcing_full = false;
>
> Again, you want to be ignoring state_default when changing
> legacy_userspace_busy.
>
> The lower level slice config handling code should be making the judgment
> based on all parameters after one changes.
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
Chris Wilson Oct. 16, 2013, 12:19 p.m. UTC | #3
On Wed, Oct 16, 2013 at 09:12:46AM -0300, Rodrigo Vivi wrote:
> >> +             dev_priv->gt_slices.forcing_full = true;
> >> +             i915_gt_full_immediate(dev, ring);
> >
> > What happens when this fails? If it only partially emits the commands,
> > etc.
> 
> This is a very good question. If it fails, my only concern would be
> the sync between HSW_GT_SLICE_INFO and MI_PREDICATE_RESULT_2. But I
> don't know how to do this during the exec buf without delaying the
> execution. Do you have any suggestion?

Reduce the series of if() to just one, act upon the return value, only
set legacy_userspace_busy to true after making the change. It will
eventually get cleaned up if the dispatch later fails, but we must make
sure that we accurately track hardware state.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 685fb1d..bd4774e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1219,6 +1219,12 @@  struct i915_package_c8 {
 	} regsave;
 };
 
+struct i915_gt_slices {
+	int state_default;
+	int forcing_full;
+	struct mutex m_lock;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -1418,6 +1424,8 @@  typedef struct drm_i915_private {
 
 	struct i915_package_c8 pc8;
 
+	struct i915_gt_slices gt_slices;
+
 	/* Old dri1 support infrastructure, beware the dragons ya fools entering
 	 * here! */
 	struct i915_dri1_state dri1;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..eb09a51 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -923,6 +923,66 @@  i915_reset_gen7_sol_offsets(struct drm_device *dev,
 }
 
 static int
+i915_gt_full_immediate(struct drm_device *dev, struct intel_ring_buffer *ring)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!HAS_SLICE_SHUTDOWN(dev) || ring == &dev_priv->ring[BCS])
+		return 0;
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_GT_SLICE_INFO);
+	intel_ring_emit(ring, SLICE_SEL_BOTH);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, MI_PREDICATE_RESULT_2);
+	intel_ring_emit(ring, LOWER_SLICE_ENABLED);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, HSW_SLICESHUTDOWN);
+	intel_ring_emit(ring, ~SLICE_SHUTDOWN);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_1US);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, WAIT_FOR_RC6_EXIT);
+	intel_ring_emit(ring, WAIT_RC6_EXIT | MASK_WAIT_RC6_EXIT);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 3);
+	if (ret)
+		return ret;
+	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+	intel_ring_emit(ring, RC_IDLE_MAX_COUNT);
+	intel_ring_emit(ring, CS_IDLE_COUNT_5US);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
@@ -999,6 +1059,13 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if ((args->flags & I915_EXEC_USE_PREDICATE) == 0 &&
+	    dev_priv->gt_slices.state_default == 0 &&
+	    !dev_priv->gt_slices.forcing_full) {
+		dev_priv->gt_slices.forcing_full = true;
+		i915_gt_full_immediate(dev, ring);
+	}
+
 	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
 	mask = I915_EXEC_CONSTANTS_MASK;
 	switch (mode) {
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 497c441..0146bef 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -277,6 +277,17 @@ 
 #define   SLICE_STATUS_MAIN_ON	(2<<0)
 #define   SLICE_STATUS_BOTH_ON	(3<<0)
 
+#define HSW_SLICESHUTDOWN	0xA190
+#define   SLICE_SHUTDOWN	(1<<0)
+
+#define RC_IDLE_MAX_COUNT	0x2054
+#define   CS_IDLE_COUNT_1US	(1<<1)
+#define   CS_IDLE_COUNT_5US	(1<<3)
+
+#define WAIT_FOR_RC6_EXIT	0x20CC
+#define   WAIT_RC6_EXIT		(1<<0)
+#define   MASK_WAIT_RC6_EXIT	(1<<16)
+
 /*
  * 3D instructions used by the kernel
  */
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 86ccd52..25b0c5b 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -124,9 +124,13 @@  static ssize_t gt_slice_config_show(struct device *kdev,
 	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
 	struct drm_device *dev = minor->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool full;
 
-	return sprintf(buf, "%s\n", I915_READ(MI_PREDICATE_RESULT_2) ==
-		       LOWER_SLICE_ENABLED ? "full" : "half");
+	mutex_lock(&dev_priv->gt_slices.m_lock);
+	full = I915_READ(MI_PREDICATE_RESULT_2) == LOWER_SLICE_ENABLED;
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+
+	return sprintf(buf, "%s\n", full ? "full" : "half");
 }
 
 static ssize_t gt_slice_config_store(struct device *kdev,
@@ -135,16 +139,19 @@  static ssize_t gt_slice_config_store(struct device *kdev,
 {
 	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
 	struct drm_device *dev = minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
 	if (!strncmp(buf, "full", sizeof("full") - 1)) {
 		ret = intel_set_gt_full(dev);
 		if (ret)
 			return ret;
+		dev_priv->gt_slices.state_default = 1;
 	} else if (!strncmp(buf, "half", sizeof("half") - 1)) {
 		ret = intel_set_gt_half(dev);
 		if (ret)
 			return ret;
+		dev_priv->gt_slices.state_default = 0;
 	} else
 		return -EINVAL;
 	return count;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4f1b636..1a2f8bc 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -7778,6 +7778,12 @@  void intel_mark_idle(struct drm_device *dev)
 
 	if (dev_priv->info->gen >= 6)
 		gen6_rps_idle(dev->dev_private);
+
+	if (HAS_SLICE_SHUTDOWN(dev) && dev_priv->gt_slices.forcing_full &&
+	    dev_priv->gt_slices.state_default == 0) {
+		intel_set_gt_half_async(dev);
+		dev_priv->gt_slices.forcing_full = false;
+	}
 }
 
 void intel_mark_fb_busy(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index a9abbb5..98cd63e 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -836,6 +836,7 @@  void intel_disable_gt_powersave(struct drm_device *dev);
 void ironlake_teardown_rc6(struct drm_device *dev);
 int intel_set_gt_full(struct drm_device *dev);
 int intel_set_gt_half(struct drm_device *dev);
+void intel_set_gt_half_async(struct drm_device *dev);
 void intel_init_gt_slices(struct drm_device *dev);
 void gen6_update_ring_freq(struct drm_device *dev);
 void gen6_rps_idle(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 63af075..2ded34c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -3873,6 +3873,7 @@  int intel_set_gt_full(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return -ENODEV;
 
+	mutex_lock(&dev_priv->gt_slices.m_lock);
 	I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 
 	/* Slices are enabled on RC6 exit */
@@ -3881,13 +3882,18 @@  int intel_set_gt_full(struct drm_device *dev)
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_BOTH_ON), 2000)) {
 		DRM_ERROR("Timeout enabling full gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+
 		gen6_gt_force_wake_put(dev_priv);
+		mutex_unlock(&dev_priv->gt_slices.m_lock);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
 	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
 
 	return 0;
 }
@@ -3899,6 +3905,7 @@  int intel_set_gt_half(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return -ENODEV;
 
+	mutex_lock(&dev_priv->gt_slices.m_lock);
 	I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
 
 	/* Slices are disabled on RC6 exit */
@@ -3907,16 +3914,40 @@  int intel_set_gt_half(struct drm_device *dev)
 	if (wait_for(((I915_READ(HSW_GT_SLICE_INFO) & SLICE_STATUS_MASK) ==
 		      SLICE_STATUS_MAIN_ON), 2000)) {
 		DRM_ERROR("Timed out disabling half gt slices\n");
+
 		I915_WRITE(HSW_GT_SLICE_INFO, SLICE_SEL_BOTH);
 		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_ENABLED);
+
 		gen6_gt_force_wake_put(dev_priv);
+		mutex_unlock(&dev_priv->gt_slices.m_lock);
 		return -ETIMEDOUT;
 	}
+
 	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
 	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+
 	return 0;
 }
 
+/**
+ * On Haswell, slices on/off transitions are done via RC6 sequence.
+ * This async function allows you to request slices shutdown without waiting.
+ * Slices will be disabled on next RC6 exit.
+ */
+void intel_set_gt_half_async(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!HAS_SLICE_SHUTDOWN(dev))
+		return;
+
+	mutex_lock(&dev_priv->gt_slices.m_lock);
+	I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
+	I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+	mutex_unlock(&dev_priv->gt_slices.m_lock);
+}
+
 void intel_init_gt_slices(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3927,9 +3958,13 @@  void intel_init_gt_slices(struct drm_device *dev)
 	if (!HAS_SLICE_SHUTDOWN(dev))
 		return;
 
+	dev_priv->gt_slices.state_default = 1;
+	dev_priv->gt_slices.forcing_full = false;
+	mutex_init(&dev_priv->gt_slices.m_lock);
+
 	if (!i915_gt_slice_config) {
-		I915_WRITE(HSW_GT_SLICE_INFO, ~SLICE_SEL_BOTH);
-		I915_WRITE(MI_PREDICATE_RESULT_2, LOWER_SLICE_DISABLED);
+		intel_set_gt_half_async(dev);
+		dev_priv->gt_slices.state_default = 0;
 	}
 }
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..3fa3e24 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,13 @@  struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/* If this flag is set userspace is using predicate and half slices can be
+ * let disabled for power saving. Otherwise use all slices even when disabled
+ * by boot parameter or via sysfs interface
+ */
+#define I915_EXEC_USE_PREDICATE		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_USE_PREDICATE<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \