diff mbox series

[7/7] drm/i915: Allow user to set cache at BO creation

Message ID 20230401063830.438127-8-fei.yang@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/mtl: Define MOCS and PAT tables for MTL | expand

Commit Message

Yang, Fei April 1, 2023, 6:38 a.m. UTC
From: Fei Yang <fei.yang@intel.com>

To comply with the design that buffer objects shall have immutable
cache setting through out its life cycle, {set, get}_caching ioctl's
are no longer supported from MTL onward. With that change caching
policy can only be set at object creation time. The current code
applies a default (platform dependent) cache setting for all objects.
However this is not optimal for performance tuning. The patch extends
the existing gem_create uAPI to let user set PAT index for the object
at creation time.
The new extension is platform independent, so UMD's can switch to using
this extension for older platforms as well, while {set, get}_caching are
still supported on these legacy paltforms for compatibility reason.

Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
 include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
 tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
 3 files changed, 105 insertions(+)

Comments

Ville Syrjälä April 3, 2023, 4:02 p.m. UTC | #1
On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.yang@intel.com wrote:
> From: Fei Yang <fei.yang@intel.com>
> 
> To comply with the design that buffer objects shall have immutable
> cache setting through out its life cycle, {set, get}_caching ioctl's
> are no longer supported from MTL onward. With that change caching
> policy can only be set at object creation time. The current code
> applies a default (platform dependent) cache setting for all objects.
> However this is not optimal for performance tuning. The patch extends
> the existing gem_create uAPI to let user set PAT index for the object
> at creation time.

This is missing the whole justification for the new uapi.
Why is MOCS not sufficient?

> The new extension is platform independent, so UMD's can switch to using
> this extension for older platforms as well, while {set, get}_caching are
> still supported on these legacy paltforms for compatibility reason.
> 
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>  tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>  3 files changed, 105 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e76c9703680e..1c6e2034d28e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -244,6 +244,7 @@ struct create_ext {
>  	unsigned int n_placements;
>  	unsigned int placement_mask;
>  	unsigned long flags;
> +	unsigned int pat_index;
>  };
>  
>  static void repr_placements(char *buf, size_t size,
> @@ -393,11 +394,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
>  	return 0;
>  }
>  
> +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> +{
> +	struct create_ext *ext_data = data;
> +	struct drm_i915_private *i915 = ext_data->i915;
> +	struct drm_i915_gem_create_ext_set_pat ext;
> +	unsigned int max_pat_index;
> +
> +	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> +		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
> +
> +	if (copy_from_user(&ext, base, sizeof(ext)))
> +		return -EFAULT;
> +
> +	max_pat_index = INTEL_INFO(i915)->max_pat_index;
> +
> +	if (ext.pat_index > max_pat_index) {
> +		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
> +			ext.pat_index);
> +		return -EINVAL;
> +	}
> +
> +	ext_data->pat_index = ext.pat_index;
> +
> +	return 0;
> +}
> +
>  static const i915_user_extension_fn create_extensions[] = {
>  	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
>  	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> +	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
>  };
>  
> +#define PAT_INDEX_NOT_SET	0xffff
>  /**
>   * Creates a new mm object and returns a handle to it.
>   * @dev: drm device pointer
> @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>  	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
>  		return -EINVAL;
>  
> +	ext_data.pat_index = PAT_INDEX_NOT_SET;
>  	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
>  				   create_extensions,
>  				   ARRAY_SIZE(create_extensions),
> @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>  	if (IS_ERR(obj))
>  		return PTR_ERR(obj);
>  
> +	if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> +		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
> +
>  	return i915_gem_publish(obj, file, &args->size, &args->handle);
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dba7c5a5b25e..03c5c314846e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
>  	 *
>  	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>  	 * struct drm_i915_gem_create_ext_protected_content.
> +	 *
> +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +	 * struct drm_i915_gem_create_ext_set_pat.
>  	 */
>  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>  	__u64 extensions;
>  };
>  
> @@ -3747,6 +3751,38 @@ struct drm_i915_gem_create_ext_protected_content {
>  	__u32 flags;
>  };
>  
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +	/** @base: Extension link. See struct i915_user_extension. */
> +	struct i915_user_extension base;
> +	/** @pat_index: PAT index to be set */
> +	__u32 pat_index;
> +	/** @rsvd: reserved for future use */
> +	__u32 rsvd;
> +};
> +
>  /* ID of the protected content session managed by i915 when PXP is active */
>  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>  
> diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
> index 8df261c5ab9b..8cdcdb5fac26 100644
> --- a/tools/include/uapi/drm/i915_drm.h
> +++ b/tools/include/uapi/drm/i915_drm.h
> @@ -3607,9 +3607,13 @@ struct drm_i915_gem_create_ext {
>  	 *
>  	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>  	 * struct drm_i915_gem_create_ext_protected_content.
> +	 *
> +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +	 * struct drm_i915_gem_create_ext_set_pat.
>  	 */
>  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>  	__u64 extensions;
>  };
>  
> @@ -3724,6 +3728,38 @@ struct drm_i915_gem_create_ext_protected_content {
>  	__u32 flags;
>  };
>  
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +	/** @base: Extension link. See struct i915_user_extension. */
> +	struct i915_user_extension base;
> +	/** @pat_index: PAT index to be set */
> +	__u32 pat_index;
> +	/** @rsvd: reserved for future use */
> +	__u32 rsvd;
> +};
> +
>  /* ID of the protected content session managed by i915 when PXP is active */
>  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>  
> -- 
> 2.25.1
Matt Roper April 3, 2023, 4:35 p.m. UTC | #2
On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote:
> On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.yang@intel.com wrote:
> > From: Fei Yang <fei.yang@intel.com>
> > 
> > To comply with the design that buffer objects shall have immutable
> > cache setting through out its life cycle, {set, get}_caching ioctl's
> > are no longer supported from MTL onward. With that change caching
> > policy can only be set at object creation time. The current code
> > applies a default (platform dependent) cache setting for all objects.
> > However this is not optimal for performance tuning. The patch extends
> > the existing gem_create uAPI to let user set PAT index for the object
> > at creation time.
> 
> This is missing the whole justification for the new uapi.
> Why is MOCS not sufficient?

PAT and MOCS are somewhat related, but they're not the same thing.  The
general direction of the hardware architecture recently has been to
slowly dumb down MOCS and move more of the important memory/cache
control over to the PAT instead.  On current platforms there is some
overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win"
for the specific fields that both can control), but MOCS doesn't have a
way to express things like snoop/coherency mode (on MTL), or class of
service (on PVC).  And if you check some of the future platforms, the
hardware design starts packing even more stuff into the PAT (not just
cache behavior) which will never be handled by MOCS.

Also keep in mind that MOCS generally applies at the GPU instruction
level; although a lot of instructions have a field to provide a MOCS
index, or can use a MOCS already associated with a surface state, there
are still some that don't.  PAT is the source of memory access
characteristics for anything that can't provide a MOCS directly.


Matt

> 
> > The new extension is platform independent, so UMD's can switch to using
> > this extension for older platforms as well, while {set, get}_caching are
> > still supported on these legacy paltforms for compatibility reason.
> > 
> > Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> > Cc: Matt Roper <matthew.d.roper@intel.com>
> > Signed-off-by: Fei Yang <fei.yang@intel.com>
> > Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
> >  include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
> >  tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
> >  3 files changed, 105 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > index e76c9703680e..1c6e2034d28e 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> > @@ -244,6 +244,7 @@ struct create_ext {
> >  	unsigned int n_placements;
> >  	unsigned int placement_mask;
> >  	unsigned long flags;
> > +	unsigned int pat_index;
> >  };
> >  
> >  static void repr_placements(char *buf, size_t size,
> > @@ -393,11 +394,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
> >  	return 0;
> >  }
> >  
> > +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> > +{
> > +	struct create_ext *ext_data = data;
> > +	struct drm_i915_private *i915 = ext_data->i915;
> > +	struct drm_i915_gem_create_ext_set_pat ext;
> > +	unsigned int max_pat_index;
> > +
> > +	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> > +		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
> > +
> > +	if (copy_from_user(&ext, base, sizeof(ext)))
> > +		return -EFAULT;
> > +
> > +	max_pat_index = INTEL_INFO(i915)->max_pat_index;
> > +
> > +	if (ext.pat_index > max_pat_index) {
> > +		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
> > +			ext.pat_index);
> > +		return -EINVAL;
> > +	}
> > +
> > +	ext_data->pat_index = ext.pat_index;
> > +
> > +	return 0;
> > +}
> > +
> >  static const i915_user_extension_fn create_extensions[] = {
> >  	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
> >  	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> > +	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
> >  };
> >  
> > +#define PAT_INDEX_NOT_SET	0xffff
> >  /**
> >   * Creates a new mm object and returns a handle to it.
> >   * @dev: drm device pointer
> > @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> >  	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
> >  		return -EINVAL;
> >  
> > +	ext_data.pat_index = PAT_INDEX_NOT_SET;
> >  	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
> >  				   create_extensions,
> >  				   ARRAY_SIZE(create_extensions),
> > @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
> >  	if (IS_ERR(obj))
> >  		return PTR_ERR(obj);
> >  
> > +	if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> > +		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
> > +
> >  	return i915_gem_publish(obj, file, &args->size, &args->handle);
> >  }
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index dba7c5a5b25e..03c5c314846e 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
> >  	 *
> >  	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
> >  	 * struct drm_i915_gem_create_ext_protected_content.
> > +	 *
> > +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> > +	 * struct drm_i915_gem_create_ext_set_pat.
> >  	 */
> >  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
> >  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> > +#define I915_GEM_CREATE_EXT_SET_PAT 2
> >  	__u64 extensions;
> >  };
> >  
> > @@ -3747,6 +3751,38 @@ struct drm_i915_gem_create_ext_protected_content {
> >  	__u32 flags;
> >  };
> >  
> > +/**
> > + * struct drm_i915_gem_create_ext_set_pat - The
> > + * I915_GEM_CREATE_EXT_SET_PAT extension.
> > + *
> > + * If this extension is provided, the specified caching policy (PAT index) is
> > + * applied to the buffer object.
> > + *
> > + * Below is an example on how to create an object with specific caching policy:
> > + *
> > + * .. code-block:: C
> > + *
> > + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> > + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> > + *              .pat_index = 0,
> > + *      };
> > + *      struct drm_i915_gem_create_ext create_ext = {
> > + *              .size = PAGE_SIZE,
> > + *              .extensions = (uintptr_t)&set_pat_ext,
> > + *      };
> > + *
> > + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> > + *      if (err) ...
> > + */
> > +struct drm_i915_gem_create_ext_set_pat {
> > +	/** @base: Extension link. See struct i915_user_extension. */
> > +	struct i915_user_extension base;
> > +	/** @pat_index: PAT index to be set */
> > +	__u32 pat_index;
> > +	/** @rsvd: reserved for future use */
> > +	__u32 rsvd;
> > +};
> > +
> >  /* ID of the protected content session managed by i915 when PXP is active */
> >  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
> >  
> > diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
> > index 8df261c5ab9b..8cdcdb5fac26 100644
> > --- a/tools/include/uapi/drm/i915_drm.h
> > +++ b/tools/include/uapi/drm/i915_drm.h
> > @@ -3607,9 +3607,13 @@ struct drm_i915_gem_create_ext {
> >  	 *
> >  	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
> >  	 * struct drm_i915_gem_create_ext_protected_content.
> > +	 *
> > +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> > +	 * struct drm_i915_gem_create_ext_set_pat.
> >  	 */
> >  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
> >  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> > +#define I915_GEM_CREATE_EXT_SET_PAT 2
> >  	__u64 extensions;
> >  };
> >  
> > @@ -3724,6 +3728,38 @@ struct drm_i915_gem_create_ext_protected_content {
> >  	__u32 flags;
> >  };
> >  
> > +/**
> > + * struct drm_i915_gem_create_ext_set_pat - The
> > + * I915_GEM_CREATE_EXT_SET_PAT extension.
> > + *
> > + * If this extension is provided, the specified caching policy (PAT index) is
> > + * applied to the buffer object.
> > + *
> > + * Below is an example on how to create an object with specific caching policy:
> > + *
> > + * .. code-block:: C
> > + *
> > + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> > + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> > + *              .pat_index = 0,
> > + *      };
> > + *      struct drm_i915_gem_create_ext create_ext = {
> > + *              .size = PAGE_SIZE,
> > + *              .extensions = (uintptr_t)&set_pat_ext,
> > + *      };
> > + *
> > + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> > + *      if (err) ...
> > + */
> > +struct drm_i915_gem_create_ext_set_pat {
> > +	/** @base: Extension link. See struct i915_user_extension. */
> > +	struct i915_user_extension base;
> > +	/** @pat_index: PAT index to be set */
> > +	__u32 pat_index;
> > +	/** @rsvd: reserved for future use */
> > +	__u32 rsvd;
> > +};
> > +
> >  /* ID of the protected content session managed by i915 when PXP is active */
> >  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
> >  
> > -- 
> > 2.25.1
> 
> -- 
> Ville Syrjälä
> Intel
Ville Syrjälä April 3, 2023, 4:48 p.m. UTC | #3
On Mon, Apr 03, 2023 at 09:35:32AM -0700, Matt Roper wrote:
> On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote:
> > On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.yang@intel.com wrote:
> > > From: Fei Yang <fei.yang@intel.com>
> > > 
> > > To comply with the design that buffer objects shall have immutable
> > > cache setting through out its life cycle, {set, get}_caching ioctl's
> > > are no longer supported from MTL onward. With that change caching
> > > policy can only be set at object creation time. The current code
> > > applies a default (platform dependent) cache setting for all objects.
> > > However this is not optimal for performance tuning. The patch extends
> > > the existing gem_create uAPI to let user set PAT index for the object
> > > at creation time.
> > 
> > This is missing the whole justification for the new uapi.
> > Why is MOCS not sufficient?
> 
> PAT and MOCS are somewhat related, but they're not the same thing.  The
> general direction of the hardware architecture recently has been to
> slowly dumb down MOCS and move more of the important memory/cache
> control over to the PAT instead.  On current platforms there is some
> overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win"
> for the specific fields that both can control), but MOCS doesn't have a
> way to express things like snoop/coherency mode (on MTL), or class of
> service (on PVC).  And if you check some of the future platforms, the
> hardware design starts packing even more stuff into the PAT (not just
> cache behavior) which will never be handled by MOCS.

Sigh. So the hardware designers screwed up MOCS yet again and
instead of getting that fixed we are adding a new uapi to work
around it?

The IMO sane approach (which IIRC was the situation for a few
platform generations at least) is that you just shove the PAT
index into MOCS (or tell it to go look it up from the PTE).
Why the heck did they not just stick with that?

> 
> Also keep in mind that MOCS generally applies at the GPU instruction
> level; although a lot of instructions have a field to provide a MOCS
> index, or can use a MOCS already associated with a surface state, there
> are still some that don't. PAT is the source of memory access
> characteristics for anything that can't provide a MOCS directly.

So what are the things that don't have MOCS and where we need
some custom cache behaviour, and we already know all that at
buffer creation time?
Lionel Landwerlin April 4, 2023, 7:29 a.m. UTC | #4
On 01/04/2023 09:38, fei.yang@intel.com wrote:
> From: Fei Yang <fei.yang@intel.com>
>
> To comply with the design that buffer objects shall have immutable
> cache setting through out its life cycle, {set, get}_caching ioctl's
> are no longer supported from MTL onward. With that change caching
> policy can only be set at object creation time. The current code
> applies a default (platform dependent) cache setting for all objects.
> However this is not optimal for performance tuning. The patch extends
> the existing gem_create uAPI to let user set PAT index for the object
> at creation time.
> The new extension is platform independent, so UMD's can switch to using
> this extension for older platforms as well, while {set, get}_caching are
> still supported on these legacy paltforms for compatibility reason.
>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>


Just like the protected content uAPI, there is no way for userspace to 
tell this feature is available other than trying using it.

Given the issues with protected content, is it not thing we could want 
to add?


Thanks,


-Lionel


> ---
>   drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>   include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>   tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>   3 files changed, 105 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e76c9703680e..1c6e2034d28e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -244,6 +244,7 @@ struct create_ext {
>   	unsigned int n_placements;
>   	unsigned int placement_mask;
>   	unsigned long flags;
> +	unsigned int pat_index;
>   };
>   
>   static void repr_placements(char *buf, size_t size,
> @@ -393,11 +394,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
>   	return 0;
>   }
>   
> +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> +{
> +	struct create_ext *ext_data = data;
> +	struct drm_i915_private *i915 = ext_data->i915;
> +	struct drm_i915_gem_create_ext_set_pat ext;
> +	unsigned int max_pat_index;
> +
> +	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> +		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
> +
> +	if (copy_from_user(&ext, base, sizeof(ext)))
> +		return -EFAULT;
> +
> +	max_pat_index = INTEL_INFO(i915)->max_pat_index;
> +
> +	if (ext.pat_index > max_pat_index) {
> +		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
> +			ext.pat_index);
> +		return -EINVAL;
> +	}
> +
> +	ext_data->pat_index = ext.pat_index;
> +
> +	return 0;
> +}
> +
>   static const i915_user_extension_fn create_extensions[] = {
>   	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
>   	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> +	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
>   };
>   
> +#define PAT_INDEX_NOT_SET	0xffff
>   /**
>    * Creates a new mm object and returns a handle to it.
>    * @dev: drm device pointer
> @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>   	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
>   		return -EINVAL;
>   
> +	ext_data.pat_index = PAT_INDEX_NOT_SET;
>   	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
>   				   create_extensions,
>   				   ARRAY_SIZE(create_extensions),
> @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>   	if (IS_ERR(obj))
>   		return PTR_ERR(obj);
>   
> +	if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> +		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
> +
>   	return i915_gem_publish(obj, file, &args->size, &args->handle);
>   }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dba7c5a5b25e..03c5c314846e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
>   	 *
>   	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>   	 * struct drm_i915_gem_create_ext_protected_content.
> +	 *
> +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +	 * struct drm_i915_gem_create_ext_set_pat.
>   	 */
>   #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>   #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>   	__u64 extensions;
>   };
>   
> @@ -3747,6 +3751,38 @@ struct drm_i915_gem_create_ext_protected_content {
>   	__u32 flags;
>   };
>   
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +	/** @base: Extension link. See struct i915_user_extension. */
> +	struct i915_user_extension base;
> +	/** @pat_index: PAT index to be set */
> +	__u32 pat_index;
> +	/** @rsvd: reserved for future use */
> +	__u32 rsvd;
> +};
> +
>   /* ID of the protected content session managed by i915 when PXP is active */
>   #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>   
> diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
> index 8df261c5ab9b..8cdcdb5fac26 100644
> --- a/tools/include/uapi/drm/i915_drm.h
> +++ b/tools/include/uapi/drm/i915_drm.h
> @@ -3607,9 +3607,13 @@ struct drm_i915_gem_create_ext {
>   	 *
>   	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>   	 * struct drm_i915_gem_create_ext_protected_content.
> +	 *
> +	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +	 * struct drm_i915_gem_create_ext_set_pat.
>   	 */
>   #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>   #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>   	__u64 extensions;
>   };
>   
> @@ -3724,6 +3728,38 @@ struct drm_i915_gem_create_ext_protected_content {
>   	__u32 flags;
>   };
>   
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +	/** @base: Extension link. See struct i915_user_extension. */
> +	struct i915_user_extension base;
> +	/** @pat_index: PAT index to be set */
> +	__u32 pat_index;
> +	/** @rsvd: reserved for future use */
> +	__u32 rsvd;
> +};
> +
>   /* ID of the protected content session managed by i915 when PXP is active */
>   #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>
Yang, Fei April 4, 2023, 4:04 p.m. UTC | #5
> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
>
> On 01/04/2023 09:38, fei.yang@intel.com wrote:
>> From: Fei Yang <fei.yang@intel.com>
>>
>> To comply with the design that buffer objects shall have immutable
>> cache setting through out its life cycle, {set, get}_caching ioctl's
>> are no longer supported from MTL onward. With that change caching
>> policy can only be set at object creation time. The current code
>> applies a default (platform dependent) cache setting for all objects.
>> However this is not optimal for performance tuning. The patch extends
>> the existing gem_create uAPI to let user set PAT index for the object
>> at creation time.
>> The new extension is platform independent, so UMD's can switch to
>> using this extension for older platforms as well, while {set,
>> get}_caching are still supported on these legacy paltforms for compatibility reason.
>>
>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>> Cc: Matt Roper <matthew.d.roper@intel.com>
>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
>
>
> Just like the protected content uAPI, there is no way for userspace to tell
> this feature is available other than trying using it.
>
> Given the issues with protected content, is it not thing we could want to add?

Sorry I'm not aware of the issues with protected content, could you elaborate?
There was a long discussion on teams uAPI channel, could you comment there if
any concerns?

https://teams.microsoft.com/l/message/19:f1767bda6734476ba0a9c7d147b928d1@thread.skype/1675860924675?tenantId=46c98d88-e344-4ed4-8496-4ed7712e255d&groupId=379f3ae1-d138-4205-bb65-d4c7d38cb481&parentMessageId=1675860924675&teamName=GSE%20OSGC&channelName=i915%20uAPI%20changes&createdTime=1675860924675&allowXTenantAccess=false

Thanks,
-Fei

>Thanks,
>
>-Lionel
>
>
>> ---
>>   drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>>   tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>>   3 files changed, 105 insertions(+)
>>
Kenneth Graunke April 4, 2023, 10:15 p.m. UTC | #6
On Monday, April 3, 2023 9:48:40 AM PDT Ville Syrjälä wrote:
> On Mon, Apr 03, 2023 at 09:35:32AM -0700, Matt Roper wrote:
> > On Mon, Apr 03, 2023 at 07:02:08PM +0300, Ville Syrjälä wrote:
> > > On Fri, Mar 31, 2023 at 11:38:30PM -0700, fei.yang@intel.com wrote:
> > > > From: Fei Yang <fei.yang@intel.com>
> > > > 
> > > > To comply with the design that buffer objects shall have immutable
> > > > cache setting through out its life cycle, {set, get}_caching ioctl's
> > > > are no longer supported from MTL onward. With that change caching
> > > > policy can only be set at object creation time. The current code
> > > > applies a default (platform dependent) cache setting for all objects.
> > > > However this is not optimal for performance tuning. The patch extends
> > > > the existing gem_create uAPI to let user set PAT index for the object
> > > > at creation time.
> > > 
> > > This is missing the whole justification for the new uapi.
> > > Why is MOCS not sufficient?
> > 
> > PAT and MOCS are somewhat related, but they're not the same thing.  The
> > general direction of the hardware architecture recently has been to
> > slowly dumb down MOCS and move more of the important memory/cache
> > control over to the PAT instead.  On current platforms there is some
> > overlap (and MOCS has an "ignore PAT" setting that makes the MOCS "win"
> > for the specific fields that both can control), but MOCS doesn't have a
> > way to express things like snoop/coherency mode (on MTL), or class of
> > service (on PVC).  And if you check some of the future platforms, the
> > hardware design starts packing even more stuff into the PAT (not just
> > cache behavior) which will never be handled by MOCS.
> 
> Sigh. So the hardware designers screwed up MOCS yet again and
> instead of getting that fixed we are adding a new uapi to work
> around it?
> 
> The IMO sane approach (which IIRC was the situation for a few
> platform generations at least) is that you just shove the PAT
> index into MOCS (or tell it to go look it up from the PTE).
> Why the heck did they not just stick with that?

There are actually some use cases in newer APIs where MOCS doesn't
work well.  For example, VK_KHR_buffer_device_address in Vulkan 1.2:

https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_KHR_buffer_device_address.html

It essentially adds "pointers to buffer memory in shaders", where apps
can just get a 64-bit pointer, and use it with an address.  On our EUs,
that turns into A64 data port messages which refer directly to memory.
Notably, there's no descriptor (i.e. SURFACE_STATE) where you could
stuff a MOCS value.  So, you get one single MOCS entry for all such
buffers...which is specified in STATE_BASE_ADDRESS.  Hope you wanted
all of them to have the same cache & coherency settings!

With PAT/PTE, we can at least specify settings for each buffer, rather
than one global setting.

Compression has also been moving towards virtual address-based solutions
and handling in the caches and memory controller, rather than in e.g.
the sampler reading SURFACE_STATE.  (It started evolving that way with
Tigerlake, really, but continues.)

> > Also keep in mind that MOCS generally applies at the GPU instruction
> > level; although a lot of instructions have a field to provide a MOCS
> > index, or can use a MOCS already associated with a surface state, there
> > are still some that don't. PAT is the source of memory access
> > characteristics for anything that can't provide a MOCS directly.
> 
> So what are the things that don't have MOCS and where we need
> some custom cache behaviour, and we already know all that at
> buffer creation time?

For Meteorlake...we have MOCS for cache settings.  We only need to use
PAT for coherency settings; I believe we can get away with deciding that
up-front at buffer creation time.  If we were doing full cacheability,
I'd be very nervous about deciding performance tuning at creation time.

--Ken
Lionel Landwerlin April 5, 2023, 7:45 a.m. UTC | #7
On 04/04/2023 19:04, Yang, Fei wrote:
>> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
>>
>> On 01/04/2023 09:38, fei.yang@intel.com wrote:
>>> From: Fei Yang <fei.yang@intel.com>
>>>
>>> To comply with the design that buffer objects shall have immutable
>>> cache setting through out its life cycle, {set, get}_caching ioctl's
>>> are no longer supported from MTL onward. With that change caching
>>> policy can only be set at object creation time. The current code
>>> applies a default (platform dependent) cache setting for all objects.
>>> However this is not optimal for performance tuning. The patch extends
>>> the existing gem_create uAPI to let user set PAT index for the object
>>> at creation time.
>>> The new extension is platform independent, so UMD's can switch to
>>> using this extension for older platforms as well, while {set,
>>> get}_caching are still supported on these legacy paltforms for compatibility reason.
>>>
>>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>>> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
>>
>> Just like the protected content uAPI, there is no way for userspace to tell
>> this feature is available other than trying using it.
>>
>> Given the issues with protected content, is it not thing we could want to add?
> Sorry I'm not aware of the issues with protected content, could you elaborate?
> There was a long discussion on teams uAPI channel, could you comment there if
> any concerns?
>
> https://teams.microsoft.com/l/message/19:f1767bda6734476ba0a9c7d147b928d1@thread.skype/1675860924675?tenantId=46c98d88-e344-4ed4-8496-4ed7712e255d&groupId=379f3ae1-d138-4205-bb65-d4c7d38cb481&parentMessageId=1675860924675&teamName=GSE%20OSGC&channelName=i915%20uAPI%20changes&createdTime=1675860924675&allowXTenantAccess=false
>
> Thanks,
> -Fei


We wanted to have a getparam to detect protected support and were told 
to detect it by trying to create a context with it.

Now it appears trying to create a protected context can block for 
several seconds.

Since we have to report capabilities to the user even before it creates 
protected contexts, any app is at risk of blocking.


-Lionel


>
>> Thanks,
>>
>> -Lionel
>>
>>
>>> ---
>>>    drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>>>    include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>>>    tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>>>    3 files changed, 105 insertions(+)
>>>
Jordan Justen April 5, 2023, 8:26 p.m. UTC | #8
On 2023-04-05 00:45:24, Lionel Landwerlin wrote:
> On 04/04/2023 19:04, Yang, Fei wrote:
> >> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
> >>
> >> Just like the protected content uAPI, there is no way for userspace to tell
> >> this feature is available other than trying using it.
> >>
> >> Given the issues with protected content, is it not thing we could want to add?
> > Sorry I'm not aware of the issues with protected content, could you elaborate?
> > There was a long discussion on teams uAPI channel, could you comment there if
> > any concerns?
> >
> 
> We wanted to have a getparam to detect protected support and were told 
> to detect it by trying to create a context with it.
> 

An extensions system where the detection mechanism is "just try it",
and assume it's not supported if it fails. ??

This seem likely to get more and more problematic as a detection
mechanism as more extensions are added.

> 
> Now it appears trying to create a protected context can block for 
> several seconds.
> 
> Since we have to report capabilities to the user even before it creates 
> protected contexts, any app is at risk of blocking.
> 

This failure path is not causing any re-thinking about using this as
the extension detection mechanism?

Doesn't the ioctl# + input-struct-size + u64-extension# identify the
extension such that the kernel could indicate if it is supported or
not. (Or, perhaps return an array of the supported extensions so the
umd doesn't have to potentially make many ioctls for each extension of
interest.)

-Jordan
Yang, Fei April 5, 2023, 11:06 p.m. UTC | #9
>Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
>
>On 04/04/2023 19:04, Yang, Fei wrote:
>>> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set
>>> cache at BO creation
>>>
>>> On 01/04/2023 09:38, fei.yang@intel.com wrote:
>>>> From: Fei Yang <fei.yang@intel.com>
>>>>
>>>> To comply with the design that buffer objects shall have immutable
>>>> cache setting through out its life cycle, {set, get}_caching ioctl's
>>>> are no longer supported from MTL onward. With that change caching
>>>> policy can only be set at object creation time. The current code
>>>> applies a default (platform dependent) cache setting for all objects.
>>>> However this is not optimal for performance tuning. The patch
>>>> extends the existing gem_create uAPI to let user set PAT index for
>>>> the object at creation time.
>>>> The new extension is platform independent, so UMD's can switch to
>>>> using this extension for older platforms as well, while {set,
>>>> get}_caching are still supported on these legacy paltforms for
>>>> compatibility reason.
>>>>
>>>> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
>>>> Cc: Matt Roper <matthew.d.roper@intel.com>
>>>> Signed-off-by: Fei Yang <fei.yang@intel.com>
>>>> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
>>>
>>> Just like the protected content uAPI, there is no way for userspace
>>> to tell this feature is available other than trying using it.
>>>
>>> Given the issues with protected content, is it not thing we could want to add?
>> Sorry I'm not aware of the issues with protected content, could you elaborate?
>> There was a long discussion on teams uAPI channel, could you comment
>> there if any concerns?
>>
>> https://teams.microsoft.com/l/message/19:f1767bda6734476ba0a9c7d147b92
>> 8d1@thread.skype/1675860924675?tenantId=46c98d88-e344-4ed4-8496-4ed771
>> 2e255d&groupId=379f3ae1-d138-4205-bb65-d4c7d38cb481&parentMessageId=16
>> 75860924675&teamName=GSE%20OSGC&channelName=i915%20uAPI%20changes&crea
>> tedTime=1675860924675&allowXTenantAccess=false
>>
>> Thanks,
>> -Fei
>
>
> We wanted to have a getparam to detect protected support and were told
> to detect it by trying to create a context with it.
>
> Now it appears trying to create a protected context can block for several
> seconds.
>
> Since we have to report capabilities to the user even before it creates
> protected contexts, any app is at risk of blocking.

Can we detect this capability by creating a buffer object? This extension is
not blocking, it just provide a way to set caching policy, and should complete
very fast. There is a IGT test I created for this extension (not merged yet),
please take a look at http://intel-gfx-pw.fi.intel.com/series/19149/

I'm not familiar with getparam, will take a look there as well. But I think it
would be easier just create an object.

-Fei

>-Lionel
>
>
>>
>>> Thanks,
>>>
>>> -Lionel
>>>
>>>
>>>> ---
>>>>    drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>>>>    include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>>>>    tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>>>>    3 files changed, 105 insertions(+)
>>>>
Matthew Auld April 6, 2023, 9:11 a.m. UTC | #10
On Sat, 1 Apr 2023 at 07:37, <fei.yang@intel.com> wrote:
>
> From: Fei Yang <fei.yang@intel.com>
>
> To comply with the design that buffer objects shall have immutable
> cache setting through out its life cycle, {set, get}_caching ioctl's
> are no longer supported from MTL onward. With that change caching
> policy can only be set at object creation time. The current code
> applies a default (platform dependent) cache setting for all objects.
> However this is not optimal for performance tuning. The patch extends
> the existing gem_create uAPI to let user set PAT index for the object
> at creation time.
> The new extension is platform independent, so UMD's can switch to using
> this extension for older platforms as well, while {set, get}_caching are
> still supported on these legacy paltforms for compatibility reason.

Do we forbid {set, get}_caching, when combined with this new extension
on the same BO? There is some documentation in @cache_dirty. The
concern is being able to subvert the flush-on-acquire for non-LLC.

>
> Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
> Cc: Matt Roper <matthew.d.roper@intel.com>
> Signed-off-by: Fei Yang <fei.yang@intel.com>
> Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_create.c | 33 ++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                | 36 ++++++++++++++++++++++
>  tools/include/uapi/drm/i915_drm.h          | 36 ++++++++++++++++++++++
>  3 files changed, 105 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> index e76c9703680e..1c6e2034d28e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
> @@ -244,6 +244,7 @@ struct create_ext {
>         unsigned int n_placements;
>         unsigned int placement_mask;
>         unsigned long flags;
> +       unsigned int pat_index;
>  };
>
>  static void repr_placements(char *buf, size_t size,
> @@ -393,11 +394,39 @@ static int ext_set_protected(struct i915_user_extension __user *base, void *data
>         return 0;
>  }
>
> +static int ext_set_pat(struct i915_user_extension __user *base, void *data)
> +{
> +       struct create_ext *ext_data = data;
> +       struct drm_i915_private *i915 = ext_data->i915;
> +       struct drm_i915_gem_create_ext_set_pat ext;
> +       unsigned int max_pat_index;
> +
> +       BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
> +                    offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
> +
> +       if (copy_from_user(&ext, base, sizeof(ext)))
> +               return -EFAULT;
> +
> +       max_pat_index = INTEL_INFO(i915)->max_pat_index;
> +
> +       if (ext.pat_index > max_pat_index) {
> +               drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
> +                       ext.pat_index);
> +               return -EINVAL;
> +       }
> +
> +       ext_data->pat_index = ext.pat_index;
> +
> +       return 0;
> +}
> +
>  static const i915_user_extension_fn create_extensions[] = {
>         [I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
>         [I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
> +       [I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
>  };
>
> +#define PAT_INDEX_NOT_SET      0xffff
>  /**
>   * Creates a new mm object and returns a handle to it.
>   * @dev: drm device pointer
> @@ -417,6 +446,7 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>         if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
>                 return -EINVAL;
>
> +       ext_data.pat_index = PAT_INDEX_NOT_SET;
>         ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
>                                    create_extensions,
>                                    ARRAY_SIZE(create_extensions),
> @@ -453,5 +483,8 @@ i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
>         if (IS_ERR(obj))
>                 return PTR_ERR(obj);
>
> +       if (ext_data.pat_index != PAT_INDEX_NOT_SET)
> +               i915_gem_object_set_pat_index(obj, ext_data.pat_index);
> +
>         return i915_gem_publish(obj, file, &args->size, &args->handle);
>  }
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index dba7c5a5b25e..03c5c314846e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -3630,9 +3630,13 @@ struct drm_i915_gem_create_ext {
>          *
>          * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>          * struct drm_i915_gem_create_ext_protected_content.
> +        *
> +        * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +        * struct drm_i915_gem_create_ext_set_pat.
>          */
>  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>         __u64 extensions;
>  };
>
> @@ -3747,6 +3751,38 @@ struct drm_i915_gem_create_ext_protected_content {
>         __u32 flags;
>  };
>
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +       /** @base: Extension link. See struct i915_user_extension. */
> +       struct i915_user_extension base;
> +       /** @pat_index: PAT index to be set */
> +       __u32 pat_index;
> +       /** @rsvd: reserved for future use */
> +       __u32 rsvd;
> +};
> +
>  /* ID of the protected content session managed by i915 when PXP is active */
>  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>
> diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
> index 8df261c5ab9b..8cdcdb5fac26 100644
> --- a/tools/include/uapi/drm/i915_drm.h
> +++ b/tools/include/uapi/drm/i915_drm.h
> @@ -3607,9 +3607,13 @@ struct drm_i915_gem_create_ext {
>          *
>          * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
>          * struct drm_i915_gem_create_ext_protected_content.
> +        *
> +        * For I915_GEM_CREATE_EXT_SET_PAT usage see
> +        * struct drm_i915_gem_create_ext_set_pat.
>          */
>  #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
>  #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
> +#define I915_GEM_CREATE_EXT_SET_PAT 2
>         __u64 extensions;
>  };
>
> @@ -3724,6 +3728,38 @@ struct drm_i915_gem_create_ext_protected_content {
>         __u32 flags;
>  };
>
> +/**
> + * struct drm_i915_gem_create_ext_set_pat - The
> + * I915_GEM_CREATE_EXT_SET_PAT extension.
> + *
> + * If this extension is provided, the specified caching policy (PAT index) is
> + * applied to the buffer object.
> + *
> + * Below is an example on how to create an object with specific caching policy:
> + *
> + * .. code-block:: C
> + *
> + *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
> + *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
> + *              .pat_index = 0,
> + *      };
> + *      struct drm_i915_gem_create_ext create_ext = {
> + *              .size = PAGE_SIZE,
> + *              .extensions = (uintptr_t)&set_pat_ext,
> + *      };
> + *
> + *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
> + *      if (err) ...
> + */
> +struct drm_i915_gem_create_ext_set_pat {
> +       /** @base: Extension link. See struct i915_user_extension. */
> +       struct i915_user_extension base;
> +       /** @pat_index: PAT index to be set */
> +       __u32 pat_index;
> +       /** @rsvd: reserved for future use */
> +       __u32 rsvd;
> +};
> +
>  /* ID of the protected content session managed by i915 when PXP is active */
>  #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
>
> --
> 2.25.1
>
Jordan Justen April 10, 2023, 8:23 a.m. UTC | #11
On 2023-04-05 13:26:43, Jordan Justen wrote:
> On 2023-04-05 00:45:24, Lionel Landwerlin wrote:
> > On 04/04/2023 19:04, Yang, Fei wrote:
> > >> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
> > >>
> > >> Just like the protected content uAPI, there is no way for userspace to tell
> > >> this feature is available other than trying using it.
> > >>
> > >> Given the issues with protected content, is it not thing we could want to add?
> > > Sorry I'm not aware of the issues with protected content, could you elaborate?
> > > There was a long discussion on teams uAPI channel, could you comment there if
> > > any concerns?
> > >
> > 
> > We wanted to have a getparam to detect protected support and were told 
> > to detect it by trying to create a context with it.
> > 
> 
> An extensions system where the detection mechanism is "just try it",
> and assume it's not supported if it fails. ??
> 

I guess no one wants to discuss the issues with this so-called
detection mechanism for i915 extensions. (Just try it and if it fails,
it must not be supported.)

I wonder how many ioctls we will be making a couple years down the
road just to see what the kernel supports.

Maybe we'll get more fun 8 second timeouts to deal with. Maybe these
probing ioctls failing or succeeding will alter the kmd's state in
some unexpected way.

It'll also be fun to debug cases where the driver is not starting up
with the noise of a bunch of probing ioctls flying by.

I thought about suggesting at least something like
I915_PARAM_CMD_PARSER_VERSION, but I don't know if that could have
prevented this 8 second timeout for creating a protected content
context. Maybe it's better than nothing though.

Of course, there was also the vague idea I threw out below for getting
a list of supported extentions.

-Jordan

> 
> This seem likely to get more and more problematic as a detection
> mechanism as more extensions are added.
> 
> > 
> > Now it appears trying to create a protected context can block for 
> > several seconds.
> > 
> > Since we have to report capabilities to the user even before it creates 
> > protected contexts, any app is at risk of blocking.
> > 
> 
> This failure path is not causing any re-thinking about using this as
> the extension detection mechanism?
> 
> Doesn't the ioctl# + input-struct-size + u64-extension# identify the
> extension such that the kernel could indicate if it is supported or
> not. (Or, perhaps return an array of the supported extensions so the
> umd doesn't have to potentially make many ioctls for each extension of
> interest.)
> 
> -Jordan
Yang, Fei April 13, 2023, 8:49 p.m. UTC | #12
> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set cache at BO creation
>
> On 2023-04-05 13:26:43, Jordan Justen wrote:
>> On 2023-04-05 00:45:24, Lionel Landwerlin wrote:
>>> On 04/04/2023 19:04, Yang, Fei wrote:
>>>>> Subject: Re: [Intel-gfx] [PATCH 7/7] drm/i915: Allow user to set 
>>>>> cache at BO creation
>>>>>
>>>>> Just like the protected content uAPI, there is no way for 
>>>>> userspace to tell this feature is available other than trying using it.
>>>>>
>>>>> Given the issues with protected content, is it not thing we could want to add?
>>>> Sorry I'm not aware of the issues with protected content, could you elaborate?
>>>> There was a long discussion on teams uAPI channel, could you 
>>>> comment there if any concerns?
>>>>
>>> 
>>> We wanted to have a getparam to detect protected support and were 
>>> told to detect it by trying to create a context with it.
>>> 
>> 
>> An extensions system where the detection mechanism is "just try it", 
>> and assume it's not supported if it fails. ??
>> 
>
> I guess no one wants to discuss the issues with this so-called detection
> mechanism for i915 extensions. (Just try it and if it fails, it must not
 be supported.)
>
> I wonder how many ioctls we will be making a couple years down the road
> just to see what the kernel supports.
>
> Maybe we'll get more fun 8 second timeouts to deal with. Maybe these probing
> ioctls failing or succeeding will alter the kmd's state in some unexpected way.

For this SET_PAT extension, I can assure you there is no 8 second wait :)
This is definitely a non-blocking call.

> It'll also be fun to debug cases where the driver is not starting up with the
> noise of a bunch of probing ioctls flying by.
>
> I thought about suggesting at least something like I915_PARAM_CMD_PARSER_VERSION,
> but I don't know if that could have prevented this 8 second timeout for creating
> a protected content context. Maybe it's better than nothing though.
>
> Of course, there was also the vague idea I threw out below for getting a list of
> supported extentions.

The detection mechanism itself is an uAPI change, I don't think it's a good idea to
combine that with this SET_PAT extension patch.
I suggest we start a discussion in the "i915 uAPI changes" teams channel just like
how we sorted out a solution for this setting cache policy issue. Would that work?

https://teams.microsoft.com/l/channel/19%3af1767bda6734476ba0a9c7d147b928d1%40thread.skype/i915%2520uAPI%2520changes?groupId=379f3ae1-d138-4205-bb65-d4c7d38cb481&tenantId=46c98d88-e344-4ed4-8496-4ed7712e255d

-Fei

> -Jordan
>
>> 
>> This seem likely to get more and more problematic as a detection 
>> mechanism as more extensions are added.
>> 
>> > 
>> > Now it appears trying to create a protected context can block for 
>> > several seconds.
>> > 
>> > Since we have to report capabilities to the user even before it 
>> > creates protected contexts, any app is at risk of blocking.
>> > 
>> 
>> This failure path is not causing any re-thinking about using this as 
>> the extension detection mechanism?
>> 
>> Doesn't the ioctl# + input-struct-size + u64-extension# identify the 
>> extension such that the kernel could indicate if it is supported or 
>> not. (Or, perhaps return an array of the supported extensions so the 
>> umd doesn't have to potentially make many ioctls for each extension of
>> interest.)
>> 
>> -Jordan
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
index e76c9703680e..1c6e2034d28e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_create.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -244,6 +244,7 @@  struct create_ext {
 	unsigned int n_placements;
 	unsigned int placement_mask;
 	unsigned long flags;
+	unsigned int pat_index;
 };
 
 static void repr_placements(char *buf, size_t size,
@@ -393,11 +394,39 @@  static int ext_set_protected(struct i915_user_extension __user *base, void *data
 	return 0;
 }
 
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+	struct create_ext *ext_data = data;
+	struct drm_i915_private *i915 = ext_data->i915;
+	struct drm_i915_gem_create_ext_set_pat ext;
+	unsigned int max_pat_index;
+
+	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+	if (copy_from_user(&ext, base, sizeof(ext)))
+		return -EFAULT;
+
+	max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+	if (ext.pat_index > max_pat_index) {
+		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
+			ext.pat_index);
+		return -EINVAL;
+	}
+
+	ext_data->pat_index = ext.pat_index;
+
+	return 0;
+}
+
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
 	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
 };
 
+#define PAT_INDEX_NOT_SET	0xffff
 /**
  * Creates a new mm object and returns a handle to it.
  * @dev: drm device pointer
@@ -417,6 +446,7 @@  i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
 	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
 		return -EINVAL;
 
+	ext_data.pat_index = PAT_INDEX_NOT_SET;
 	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
 				   create_extensions,
 				   ARRAY_SIZE(create_extensions),
@@ -453,5 +483,8 @@  i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
 
+	if (ext_data.pat_index != PAT_INDEX_NOT_SET)
+		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+
 	return i915_gem_publish(obj, file, &args->size, &args->handle);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..03c5c314846e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -3630,9 +3630,13 @@  struct drm_i915_gem_create_ext {
 	 *
 	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 	 * struct drm_i915_gem_create_ext_protected_content.
+	 *
+	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
+	 * struct drm_i915_gem_create_ext_set_pat.
 	 */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
 	__u64 extensions;
 };
 
@@ -3747,6 +3751,38 @@  struct drm_i915_gem_create_ext_protected_content {
 	__u32 flags;
 };
 
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ *              .pat_index = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&set_pat_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+	/** @base: Extension link. See struct i915_user_extension. */
+	struct i915_user_extension base;
+	/** @pat_index: PAT index to be set */
+	__u32 pat_index;
+	/** @rsvd: reserved for future use */
+	__u32 rsvd;
+};
+
 /* ID of the protected content session managed by i915 when PXP is active */
 #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
 
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 8df261c5ab9b..8cdcdb5fac26 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -3607,9 +3607,13 @@  struct drm_i915_gem_create_ext {
 	 *
 	 * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
 	 * struct drm_i915_gem_create_ext_protected_content.
+	 *
+	 * For I915_GEM_CREATE_EXT_SET_PAT usage see
+	 * struct drm_i915_gem_create_ext_set_pat.
 	 */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
 	__u64 extensions;
 };
 
@@ -3724,6 +3728,38 @@  struct drm_i915_gem_create_ext_protected_content {
 	__u32 flags;
 };
 
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ *              .pat_index = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&set_pat_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+	/** @base: Extension link. See struct i915_user_extension. */
+	struct i915_user_extension base;
+	/** @pat_index: PAT index to be set */
+	__u32 pat_index;
+	/** @rsvd: reserved for future use */
+	__u32 rsvd;
+};
+
 /* ID of the protected content session managed by i915 when PXP is active */
 #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf