diff mbox

[v3,3/3] drm/i915: Give proper names to MOCS entries

Message ID 1467380406-11954-4-git-send-email-imre.deak@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Imre Deak July 1, 2016, 1:40 p.m. UTC
The purpose for each MOCS entry isn't well defined atm. Defining these
is important to remove any uncertainty about the use of these entries
for example in terms of performance and GPU/CPU coherency.

Suggested by Ville.

CC: Rong R Yang <rong.r.yang@intel.com>
CC: Yakui Zhao <yakui.zhao@intel.com>
CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
CC: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak@intel.com>
---
 drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
 include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 6 deletions(-)

Comments

Chris Wilson July 1, 2016, 1:49 p.m. UTC | #1
On Fri, Jul 01, 2016 at 04:40:06PM +0300, Imre Deak wrote:
> The purpose for each MOCS entry isn't well defined atm. Defining these
> is important to remove any uncertainty about the use of these entries
> for example in terms of performance and GPU/CPU coherency.
> 
> Suggested by Ville.
> 
> CC: Rong R Yang <rong.r.yang@intel.com>
> CC: Yakui Zhao <yakui.zhao@intel.com>
> CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> CC: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Imre Deak <imre.deak@intel.com>
> ---
>  drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
>  include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
>  2 files changed, 31 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 927825f..86adc11 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
>   *       end.
>   */
>  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> -	{ /* 0x00000009 */
> +	[I915_MOCS_UNCACHED] = {
> +	  /* 0x00000009 */
>  	  .control_value = LE_CACHEABILITY(LE_UC) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>  			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
> @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>  	  /* 0x0010 */
>  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>  	},
> -	{
> +	[I915_MOCS_AUTO] = {
>  	  /* 0x00000038 */
>  	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>  	  /* 0x0030 */
>  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>  	},
> -	{
> +	[I915_MOCS_CACHED] = {
>  	  /* 0x0000003b */
>  	  .control_value = LE_CACHEABILITY(LE_WB) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>  
>  /* NOTE: the LE_TGT_CACHE is not used on Broxton */
>  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> -	{
> +	[I915_MOCS_UNCACHED] = {
>  	  /* 0x00000009 */
>  	  .control_value = LE_CACHEABILITY(LE_UC) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>  	  /* 0x0010 */
>  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>  	},
> -	{
> +	[I915_MOCS_AUTO] = {
>  	  /* 0x00000038 */
>  	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>  	  /* 0x0030 */
>  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>  	},
> -	{
> +	[I915_MOCS_CACHED] = {
>  	  /* 0x00000039 */
>  	  .control_value = LE_CACHEABILITY(LE_UC) |
>  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c17d63d..a5d116f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -62,6 +62,30 @@ extern "C" {
>  #define I915_ERROR_UEVENT		"ERROR"
>  #define I915_RESET_UEVENT		"RESET"
>  
> +/*
> + * MOCS indexes used for GPU surfaces, defining the cacheability of the
> + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> + */
> +enum i915_mocs_table_index {
> +	/*
> +	 * Not cached anywhere, coherency between CPU and GPU accesses is
> +	 * guaranteed.
> +	 */
> +	I915_MOCS_UNCACHED,
> +	/*
> +	 * Cacheability and coherency controlled by the kernel automatically
> +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
> +	 * usage of the surface (used for display scanout or not).
> +	 */
> +	I915_MOCS_AUTO,

So PTE.

> +	/*
> +	 * Cached in all GPU caches available on the platform.
> +	 * Coherency between CPU and GPU accesses to the surface is not
> +	 * guaranteed without extra synchronization.
> +	 */
> +	I915_MOCS_CACHED,

So pretty useless for its current usage then.
-Chris
Imre Deak July 1, 2016, 1:56 p.m. UTC | #2
On pe, 2016-07-01 at 14:49 +0100, Chris Wilson wrote:
> On Fri, Jul 01, 2016 at 04:40:06PM +0300, Imre Deak wrote:
> > The purpose for each MOCS entry isn't well defined atm. Defining these
> > is important to remove any uncertainty about the use of these entries
> > for example in terms of performance and GPU/CPU coherency.
> > 
> > Suggested by Ville.
> > 
> > CC: Rong R Yang <rong.r.yang@intel.com>
> > CC: Yakui Zhao <yakui.zhao@intel.com>
> > CC: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > CC: Chris Wilson <chris@chris-wilson.co.uk>
> > Signed-off-by: Imre Deak <imre.deak@intel.com>
> > ---
> >  drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
> >  include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
> >  2 files changed, 31 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> > index 927825f..86adc11 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/intel_mocs.c
> > @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
> >   *       end.
> >   */
> >  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> > -	{ /* 0x00000009 */
> > +	[I915_MOCS_UNCACHED] = {
> > +	  /* 0x00000009 */
> >  	  .control_value = LE_CACHEABILITY(LE_UC) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> >  			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
> > @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> >  	  /* 0x0010 */
> >  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
> >  	},
> > -	{
> > +	[I915_MOCS_AUTO] = {
> >  	  /* 0x00000038 */
> >  	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> >  	  /* 0x0030 */
> >  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
> >  	},
> > -	{
> > +	[I915_MOCS_CACHED] = {
> >  	  /* 0x0000003b */
> >  	  .control_value = LE_CACHEABILITY(LE_WB) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> >  
> >  /* NOTE: the LE_TGT_CACHE is not used on Broxton */
> >  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> > -	{
> > +	[I915_MOCS_UNCACHED] = {
> >  	  /* 0x00000009 */
> >  	  .control_value = LE_CACHEABILITY(LE_UC) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >  	  /* 0x0010 */
> >  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
> >  	},
> > -	{
> > +	[I915_MOCS_AUTO] = {
> >  	  /* 0x00000038 */
> >  	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >  	  /* 0x0030 */
> >  	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
> >  	},
> > -	{
> > +	[I915_MOCS_CACHED] = {
> >  	  /* 0x00000039 */
> >  	  .control_value = LE_CACHEABILITY(LE_UC) |
> >  			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index c17d63d..a5d116f 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -62,6 +62,30 @@ extern "C" {
> >  #define I915_ERROR_UEVENT		"ERROR"
> >  #define I915_RESET_UEVENT		"RESET"
> >  
> > +/*
> > + * MOCS indexes used for GPU surfaces, defining the cacheability of the
> > + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> > + */
> > +enum i915_mocs_table_index {
> > +	/*
> > +	 * Not cached anywhere, coherency between CPU and GPU accesses is
> > +	 * guaranteed.
> > +	 */
> > +	I915_MOCS_UNCACHED,
> > +	/*
> > +	 * Cacheability and coherency controlled by the kernel automatically
> > +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
> > +	 * usage of the surface (used for display scanout or not).
> > +	 */
> > +	I915_MOCS_AUTO,
> 
> So PTE.

Can change it.

> > +	/*
> > +	 * Cached in all GPU caches available on the platform.
> > +	 * Coherency between CPU and GPU accesses to the surface is not
> > +	 * guaranteed without extra synchronization.
> > +	 */
> > +	I915_MOCS_CACHED,
> 
> So pretty useless for its current usage then.

This is how it's used in Mesa atm where there is no need for coherency.
Beignet and Libva don't use this entry atm.

--Imre
Zhao, Yakui July 13, 2016, 2:22 a.m. UTC | #3
On 07/01/2016 09:40 PM, Deak, Imre wrote:
> The purpose for each MOCS entry isn't well defined atm. Defining these
> is important to remove any uncertainty about the use of these entries
> for example in terms of performance and GPU/CPU coherency.
>
> Suggested by Ville.
>
> CC: Rong R Yang<rong.r.yang@intel.com>
> CC: Yakui Zhao<yakui.zhao@intel.com>
> CC: Ville Syrjälä<ville.syrjala@linux.intel.com>
> CC: Chris Wilson<chris@chris-wilson.co.uk>
> Signed-off-by: Imre Deak<imre.deak@intel.com>

This looks readable and meaningful after giving proper names to MOCS 
entry index.

But not sure whether the comment of I915_MOCS_CACHE has one typo?

> ---
>   drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
>   include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
>   2 files changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 927825f..86adc11 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
>    *       end.
>    */
>   static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> -	{ /* 0x00000009 */
> +	[I915_MOCS_UNCACHED] = {
> +	  /* 0x00000009 */
>   	  .control_value = LE_CACHEABILITY(LE_UC) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>   			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
> @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>   	  /* 0x0010 */
>   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>   	},
> -	{
> +	[I915_MOCS_AUTO] = {
>   	  /* 0x00000038 */
>   	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>   	  /* 0x0030 */
>   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>   	},
> -	{
> +	[I915_MOCS_CACHED] = {
>   	  /* 0x0000003b */
>   	  .control_value = LE_CACHEABILITY(LE_WB) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>
>   /* NOTE: the LE_TGT_CACHE is not used on Broxton */
>   static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> -	{
> +	[I915_MOCS_UNCACHED] = {
>   	  /* 0x00000009 */
>   	  .control_value = LE_CACHEABILITY(LE_UC) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>   	  /* 0x0010 */
>   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>   	},
> -	{
> +	[I915_MOCS_AUTO] = {
>   	  /* 0x00000038 */
>   	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>   	  /* 0x0030 */
>   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>   	},
> -	{
> +	[I915_MOCS_CACHED] = {
>   	  /* 0x00000039 */
>   	  .control_value = LE_CACHEABILITY(LE_UC) |
>   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index c17d63d..a5d116f 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -62,6 +62,30 @@ extern "C" {
>   #define I915_ERROR_UEVENT		"ERROR"
>   #define I915_RESET_UEVENT		"RESET"
>
> +/*
> + * MOCS indexes used for GPU surfaces, defining the cacheability of the
> + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> + */
> +enum i915_mocs_table_index {
> +	/*
> +	 * Not cached anywhere, coherency between CPU and GPU accesses is
> +	 * guaranteed.
> +	 */
> +	I915_MOCS_UNCACHED,
> +	/*
> +	 * Cacheability and coherency controlled by the kernel automatically
> +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
> +	 * usage of the surface (used for display scanout or not).
> +	 */
> +	I915_MOCS_AUTO,
> +	/*
> +	 * Cached in all GPU caches available on the platform.
> +	 * Coherency between CPU and GPU accesses to the surface is not
> +	 * guaranteed without extra synchronization.
> +	 */

IMO the coherency is guaranteed without extra synchronization for the 
MOCS_CACHED.

> +	I915_MOCS_CACHED,
> +};
> +
>   /* Each region is a minimum of 16k, and there are at most 255 of them.
>    */
>   #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
Imre Deak July 13, 2016, 10:04 a.m. UTC | #4
Hi Yakui,

thanks for taking a look at these, see my comment below.

On ke, 2016-07-13 at 10:22 +0800, Zhao Yakui wrote:
> On 07/01/2016 09:40 PM, Deak, Imre wrote:
> > The purpose for each MOCS entry isn't well defined atm. Defining these
> > is important to remove any uncertainty about the use of these entries
> > for example in terms of performance and GPU/CPU coherency.
> > 
> > Suggested by Ville.
> > 
> > CC: Rong R Yang<rong.r.yang@intel.com>
> > CC: Yakui Zhao<yakui.zhao@intel.com>
> > CC: Ville Syrjälä<ville.syrjala@linux.intel.com>
> > CC: Chris Wilson<chris@chris-wilson.co.uk>
> > Signed-off-by: Imre Deak<imre.deak@intel.com>
> 
> This looks readable and meaningful after giving proper names to MOCS 
> entry index.
> 
> But not sure whether the comment of I915_MOCS_CACHE has one typo?
> 
> > ---
> >   drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
> >   include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
> >   2 files changed, 31 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> > index 927825f..86adc11 100644
> > --- a/drivers/gpu/drm/i915/intel_mocs.c
> > +++ b/drivers/gpu/drm/i915/intel_mocs.c
> > @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
> >    *       end.
> >    */
> >   static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> > -	{ /* 0x00000009 */
> > +	[I915_MOCS_UNCACHED] = {
> > +	  /* 0x00000009 */
> >   	  .control_value = LE_CACHEABILITY(LE_UC) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> >   			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
> > @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> >   	  /* 0x0010 */
> >   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
> >   	},
> > -	{
> > +	[I915_MOCS_AUTO] = {
> >   	  /* 0x00000038 */
> >   	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> >   	  /* 0x0030 */
> >   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
> >   	},
> > -	{
> > +	[I915_MOCS_CACHED] = {
> >   	  /* 0x0000003b */
> >   	  .control_value = LE_CACHEABILITY(LE_WB) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
> > 
> >   /* NOTE: the LE_TGT_CACHE is not used on Broxton */
> >   static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> > -	{
> > +	[I915_MOCS_UNCACHED] = {
> >   	  /* 0x00000009 */
> >   	  .control_value = LE_CACHEABILITY(LE_UC) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >   	  /* 0x0010 */
> >   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
> >   	},
> > -	{
> > +	[I915_MOCS_AUTO] = {
> >   	  /* 0x00000038 */
> >   	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
> >   	  /* 0x0030 */
> >   	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
> >   	},
> > -	{
> > +	[I915_MOCS_CACHED] = {
> >   	  /* 0x00000039 */
> >   	  .control_value = LE_CACHEABILITY(LE_UC) |
> >   			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index c17d63d..a5d116f 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -62,6 +62,30 @@ extern "C" {
> >   #define I915_ERROR_UEVENT		"ERROR"
> >   #define I915_RESET_UEVENT		"RESET"
> > 
> > +/*
> > + * MOCS indexes used for GPU surfaces, defining the cacheability of the
> > + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
> > + */
> > +enum i915_mocs_table_index {
> > +	/*
> > +	 * Not cached anywhere, coherency between CPU and GPU accesses is
> > +	 * guaranteed.
> > +	 */
> > +	I915_MOCS_UNCACHED,
> > +	/*
> > +	 * Cacheability and coherency controlled by the kernel automatically
> > +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
> > +	 * usage of the surface (used for display scanout or not).
> > +	 */
> > +	I915_MOCS_AUTO,
> > +	/*
> > +	 * Cached in all GPU caches available on the platform.
> > +	 * Coherency between CPU and GPU accesses to the surface is not
> > +	 * guaranteed without extra synchronization.
> > +	 */
> 
> IMO the coherency is guaranteed without extra synchronization for the 
> MOCS_CACHED.

No. On BXT it will make the data cached in GPU caches but will not keep
the data coherent between GPU and CPU without extra synchronization.
For that we would need to enable snooping, but that has considerable
overhead, so we turn that off in patch 2/3. On SKL using this entry
happens to give you a coherent mapping, but that's just because the HW
doesn't allow us to turn off snooping on that platform (supposedly
because there snooping doesn't have a considerable overhead thanks to
LLC).

--Imre

> 
> > +	I915_MOCS_CACHED,
> > +};
> > +
> >   /* Each region is a minimum of 16k, and there are at most 255 of them.
> >    */
> >   #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
>
Zhao, Yakui July 14, 2016, 1:38 a.m. UTC | #5
On 07/13/2016 06:04 PM, Deak, Imre wrote:
> Hi Yakui,
>
> thanks for taking a look at these, see my comment below.
>
> On ke, 2016-07-13 at 10:22 +0800, Zhao Yakui wrote:
>> On 07/01/2016 09:40 PM, Deak, Imre wrote:
>>> The purpose for each MOCS entry isn't well defined atm. Defining these
>>> is important to remove any uncertainty about the use of these entries
>>> for example in terms of performance and GPU/CPU coherency.
>>>
>>> Suggested by Ville.
>>>
>>> CC: Rong R Yang<rong.r.yang@intel.com>
>>> CC: Yakui Zhao<yakui.zhao@intel.com>
>>> CC: Ville Syrjälä<ville.syrjala@linux.intel.com>
>>> CC: Chris Wilson<chris@chris-wilson.co.uk>
>>> Signed-off-by: Imre Deak<imre.deak@intel.com>
>>
>> This looks readable and meaningful after giving proper names to MOCS
>> entry index.
>>
>> But not sure whether the comment of I915_MOCS_CACHE has one typo?
>>
>>> ---
>>>    drivers/gpu/drm/i915/intel_mocs.c | 13 +++++++------
>>>    include/uapi/drm/i915_drm.h       | 24 ++++++++++++++++++++++++
>>>    2 files changed, 31 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
>>> index 927825f..86adc11 100644
>>> --- a/drivers/gpu/drm/i915/intel_mocs.c
>>> +++ b/drivers/gpu/drm/i915/intel_mocs.c
>>> @@ -97,7 +97,8 @@ struct drm_i915_mocs_table {
>>>     *       end.
>>>     */
>>>    static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>> -	{ /* 0x00000009 */
>>> +	[I915_MOCS_UNCACHED] = {
>>> +	  /* 0x00000009 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>>    			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
>>> @@ -106,7 +107,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>    	  /* 0x0010 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_AUTO] = {
>>>    	  /* 0x00000038 */
>>>    	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -115,7 +116,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>    	  /* 0x0030 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_CACHED] = {
>>>    	  /* 0x0000003b */
>>>    	  .control_value = LE_CACHEABILITY(LE_WB) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -128,7 +129,7 @@ static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>>>
>>>    /* NOTE: the LE_TGT_CACHE is not used on Broxton */
>>>    static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>> -	{
>>> +	[I915_MOCS_UNCACHED] = {
>>>    	  /* 0x00000009 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -138,7 +139,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	  /* 0x0010 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_AUTO] = {
>>>    	  /* 0x00000038 */
>>>    	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> @@ -148,7 +149,7 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
>>>    	  /* 0x0030 */
>>>    	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
>>>    	},
>>> -	{
>>> +	[I915_MOCS_CACHED] = {
>>>    	  /* 0x00000039 */
>>>    	  .control_value = LE_CACHEABILITY(LE_UC) |
>>>    			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index c17d63d..a5d116f 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -62,6 +62,30 @@ extern "C" {
>>>    #define I915_ERROR_UEVENT		"ERROR"
>>>    #define I915_RESET_UEVENT		"RESET"
>>>
>>> +/*
>>> + * MOCS indexes used for GPU surfaces, defining the cacheability of the
>>> + * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
>>> + */
>>> +enum i915_mocs_table_index {
>>> +	/*
>>> +	 * Not cached anywhere, coherency between CPU and GPU accesses is
>>> +	 * guaranteed.
>>> +	 */
>>> +	I915_MOCS_UNCACHED,
>>> +	/*
>>> +	 * Cacheability and coherency controlled by the kernel automatically
>>> +	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
>>> +	 * usage of the surface (used for display scanout or not).
>>> +	 */
>>> +	I915_MOCS_AUTO,
>>> +	/*
>>> +	 * Cached in all GPU caches available on the platform.
>>> +	 * Coherency between CPU and GPU accesses to the surface is not
>>> +	 * guaranteed without extra synchronization.
>>> +	 */
>>
>> IMO the coherency is guaranteed without extra synchronization for the
>> MOCS_CACHED.
>
> No. On BXT it will make the data cached in GPU caches but will not keep
> the data coherent between GPU and CPU without extra synchronization.
> For that we would need to enable snooping, but that has considerable
> overhead, so we turn that off in patch 2/3. On SKL using this entry
> happens to give you a coherent mapping, but that's just because the HW
> doesn't allow us to turn off snooping on that platform (supposedly
> because there snooping doesn't have a considerable overhead thanks to
> LLC).

thanks for the detailed explanation.
Now it is clear to me.

Thanks
    Yakui
>
> --Imre
>
>>
>>> +	I915_MOCS_CACHED,
>>> +};
>>> +
>>>    /* Each region is a minimum of 16k, and there are at most 255 of them.
>>>     */
>>>    #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use
>>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
index 927825f..86adc11 100644
--- a/drivers/gpu/drm/i915/intel_mocs.c
+++ b/drivers/gpu/drm/i915/intel_mocs.c
@@ -97,7 +97,8 @@  struct drm_i915_mocs_table {
  *       end.
  */
 static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
-	{ /* 0x00000009 */
+	[I915_MOCS_UNCACHED] = {
+	  /* 0x00000009 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
 			   LE_LRUM(0) | LE_AOM(0) | LE_RSC(0) | LE_SCC(0) |
@@ -106,7 +107,7 @@  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	  /* 0x0010 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
 	},
-	{
+	[I915_MOCS_AUTO] = {
 	  /* 0x00000038 */
 	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -115,7 +116,7 @@  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 	  /* 0x0030 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
 	},
-	{
+	[I915_MOCS_CACHED] = {
 	  /* 0x0000003b */
 	  .control_value = LE_CACHEABILITY(LE_WB) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -128,7 +129,7 @@  static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
 
 /* NOTE: the LE_TGT_CACHE is not used on Broxton */
 static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
-	{
+	[I915_MOCS_UNCACHED] = {
 	  /* 0x00000009 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -138,7 +139,7 @@  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	  /* 0x0010 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_UC),
 	},
-	{
+	[I915_MOCS_AUTO] = {
 	  /* 0x00000038 */
 	  .control_value = LE_CACHEABILITY(LE_PAGETABLE) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
@@ -148,7 +149,7 @@  static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
 	  /* 0x0030 */
 	  .l3cc_value =    L3_ESC(0) | L3_SCC(0) | L3_CACHEABILITY(L3_WB),
 	},
-	{
+	[I915_MOCS_CACHED] = {
 	  /* 0x00000039 */
 	  .control_value = LE_CACHEABILITY(LE_UC) |
 			   LE_TGT_CACHE(LE_TC_LLC_ELLC) |
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index c17d63d..a5d116f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -62,6 +62,30 @@  extern "C" {
 #define I915_ERROR_UEVENT		"ERROR"
 #define I915_RESET_UEVENT		"RESET"
 
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum i915_mocs_table_index {
+	/*
+	 * Not cached anywhere, coherency between CPU and GPU accesses is
+	 * guaranteed.
+	 */
+	I915_MOCS_UNCACHED,
+	/*
+	 * Cacheability and coherency controlled by the kernel automatically
+	 * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
+	 * usage of the surface (used for display scanout or not).
+	 */
+	I915_MOCS_AUTO,
+	/*
+	 * Cached in all GPU caches available on the platform.
+	 * Coherency between CPU and GPU accesses to the surface is not
+	 * guaranteed without extra synchronization.
+	 */
+	I915_MOCS_CACHED,
+};
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255	/* table size 2k - maximum due to use