diff mbox

[3/3] drm/i915: Export ability of changing cache levels to userspace

Message ID 1341833679-11614-4-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 9, 2012, 11:34 a.m. UTC
By selecting the cache level (essentially whether or not the CPU snoops
any updates to the bo, and on more recent machines whether it resides
inside the CPU's last-level-cache) a userspace driver is able to then
manage all of its memory within buffer objects, if it so desires. This
enables the userspace driver to accelerate uploads and more importantly
downloads from the GPU and to able to mix CPU and GPU rendering/activity
efficiently.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_dma.c |    2 ++
 drivers/gpu/drm/i915/i915_drv.h |   11 ++++++---
 drivers/gpu/drm/i915/i915_gem.c |   50 +++++++++++++++++++++++++++++++++++++++
 include/drm/i915_drm.h          |   16 +++++++++++++
 4 files changed, 76 insertions(+), 3 deletions(-)

Comments

Daniel Vetter July 10, 2012, 8:54 a.m. UTC | #1
On Mon, Jul 09, 2012 at 12:34:39PM +0100, Chris Wilson wrote:
> By selecting the cache level (essentially whether or not the CPU snoops
> any updates to the bo, and on more recent machines whether it resides
> inside the CPU's last-level-cache) a userspace driver is able to then
> manage all of its memory within buffer objects, if it so desires. This
> enables the userspace driver to accelerate uploads and more importantly
> downloads from the GPU and to able to mix CPU and GPU rendering/activity
> efficiently.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_dma.c |    2 ++
>  drivers/gpu/drm/i915/i915_drv.h |   11 ++++++---
>  drivers/gpu/drm/i915/i915_gem.c |   50 +++++++++++++++++++++++++++++++++++++++
>  include/drm/i915_drm.h          |   16 +++++++++++++
>  4 files changed, 76 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index f64ef4b..2302e008 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1829,6 +1829,8 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHE_LEVEL, i915_gem_set_cache_level_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHE_LEVEL, i915_gem_get_cache_level_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 4fb358e..00a4cb1 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -40,6 +40,7 @@
>  #include <linux/backlight.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
> +#include <drm/i915_drm.h>
>  
>  /* General customization:
>   */
> @@ -854,9 +855,9 @@ enum hdmi_force_audio {
>  };
>  
>  enum i915_cache_level {
> -	I915_CACHE_NONE,
> -	I915_CACHE_LLC,
> -	I915_CACHE_LLC_MLC, /* gen6+ */
> +	I915_CACHE_NONE = I915_CACHE_LEVEL_NONE,
> +	I915_CACHE_LLC = I915_CACHE_LEVEL_LLC,
> +	I915_CACHE_LLC_MLC = I915_CACHE_LEVEL_LLC_MLC, /* gen6+ */

LLC_MLC is a lie, it doesn't exist on gen6. And gen7 has something else
called l3$ cache, but that seems to be more special-purpose in nature
(hence I have a feeling it's better if userspace just sets the desired
caching in the surface state).

The other thing that's irking me is whether we want different names for
different kinds of caching or not, i.e. whether we should split out
pre-gen6 coherent mem from gen6+ coherent stuff. Also, on vlv we don't
have a llc cache, but we can still support coherent memory like on gen6+
with llc (it's just a bit slower for gpu-only use, hence not the default).

I guess I'd bikeshed less if we color this I915_CACHE_LEVEL_CPU_COHERENT
(and maybe add more specific variants in the future if we need them).
-Daniel

>  };
>  
>  struct drm_i915_gem_object {
> @@ -1249,6 +1250,10 @@ int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
>  			 struct drm_file *file_priv);
>  int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
> +int i915_gem_get_cache_level_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file);
> +int i915_gem_set_cache_level_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file);
>  int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
>  			    struct drm_file *file_priv);
>  int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index da89f13..75d67b8 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3102,6 +3102,56 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  	return 0;
>  }
>  
> +int i915_gem_get_cache_level_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file)
> +{
> +	struct drm_i915_gem_cache_level *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret)
> +		return ret;
> +
> +	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
> +	if (&obj->base == NULL) {
> +		ret = -ENOENT;
> +		goto unlock;
> +	}
> +
> +	args->cache_level = obj->cache_level;
> +
> +	drm_gem_object_unreference(&obj->base);
> +unlock:
> +	mutex_unlock(&dev->struct_mutex);
> +	return ret;
> +}
> +
> +int i915_gem_set_cache_level_ioctl(struct drm_device *dev, void *data,
> +				   struct drm_file *file)
> +{
> +	struct drm_i915_gem_cache_level *args = data;
> +	struct drm_i915_gem_object *obj;
> +	int ret;
> +
> +	ret = i915_mutex_lock_interruptible(dev);
> +	if (ret)
> +		return ret;
> +
> +	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
> +	if (&obj->base == NULL) {
> +		ret = -ENOENT;
> +		goto unlock;
> +	}
> +
> +	ret = i915_gem_object_set_cache_level(obj, args->cache_level);
> +
> +	drm_gem_object_unreference(&obj->base);
> +unlock:
> +	mutex_unlock(&dev->struct_mutex);
> +	return ret;
> +}
> +
>  /*
>   * Prepare buffer for display plane (scanout, cursors, etc).
>   * Can be called from an uninterruptible phase (modesetting) and allows
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 564005e..058feba 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -203,6 +203,8 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_WAIT	0x2c
>  #define DRM_I915_GEM_CONTEXT_CREATE	0x2d
>  #define DRM_I915_GEM_CONTEXT_DESTROY	0x2e
> +#define DRM_I915_GEM_SET_CACHE_LEVEL	0x2f
> +#define DRM_I915_GEM_GET_CACHE_LEVEL	0x30
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -227,6 +229,8 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> +#define DRM_IOCTL_I915_GEM_SET_CACHE_LEVEL		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHE_LEVEL, struct drm_i915_gem_cache_level)
> +#define DRM_IOCTL_I915_GEM_GET_CACHE_LEVEL		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_LEVEL, struct drm_i915_gem_cache_level)
>  #define DRM_IOCTL_I915_GEM_THROTTLE	DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE)
>  #define DRM_IOCTL_I915_GEM_ENTERVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
>  #define DRM_IOCTL_I915_GEM_LEAVEVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
> @@ -707,6 +711,18 @@ struct drm_i915_gem_busy {
>  	__u32 busy;
>  };
>  
> +#define I915_CACHE_LEVEL_NONE		0
> +#define I915_CACHE_LEVEL_LLC		1
> +#define I915_CACHE_LEVEL_LLC_MLC	2 /* gen6+ */
> +
> +struct drm_i915_gem_cache_level {
> +	/** Handle of the buffer to check for busy */
> +	__u32 handle;
> +
> +	/** Cache level to apply or return value */
> +	__u32 cache_level;
> +};
> +
>  #define I915_TILING_NONE	0
>  #define I915_TILING_X		1
>  #define I915_TILING_Y		2
> -- 
> 1.7.10
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson July 10, 2012, 9 a.m. UTC | #2
On Tue, 10 Jul 2012 10:54:02 +0200, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Mon, Jul 09, 2012 at 12:34:39PM +0100, Chris Wilson wrote:
> >  enum i915_cache_level {
> > -	I915_CACHE_NONE,
> > -	I915_CACHE_LLC,
> > -	I915_CACHE_LLC_MLC, /* gen6+ */
> > +	I915_CACHE_NONE = I915_CACHE_LEVEL_NONE,
> > +	I915_CACHE_LLC = I915_CACHE_LEVEL_LLC,
> > +	I915_CACHE_LLC_MLC = I915_CACHE_LEVEL_LLC_MLC, /* gen6+ */
> 
> LLC_MLC is a lie, it doesn't exist on gen6. And gen7 has something else
> called l3$ cache, but that seems to be more special-purpose in nature
> (hence I have a feeling it's better if userspace just sets the desired
> caching in the surface state).
> 
> The other thing that's irking me is whether we want different names for
> different kinds of caching or not, i.e. whether we should split out
> pre-gen6 coherent mem from gen6+ coherent stuff. Also, on vlv we don't
> have a llc cache, but we can still support coherent memory like on gen6+
> with llc (it's just a bit slower for gpu-only use, hence not the default).

"Just a bit" will be an understatement judging by the snoopable
architectures upon which it is based. :-p

> I guess I'd bikeshed less if we color this I915_CACHE_LEVEL_CPU_COHERENT
> (and maybe add more specific variants in the future if we need them).

I was half thinking towards extensibility, but we are more likely to
need new ioctls rather than just add to this set of "cache levels".

I am happy with just having two levels in the userspace for this:
uncached, snoopable. They are generic enough to cover the last 7
generations, good enough for the next 3?
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f64ef4b..2302e008 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1829,6 +1829,8 @@  struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHE_LEVEL, i915_gem_set_cache_level_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHE_LEVEL, i915_gem_get_cache_level_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4fb358e..00a4cb1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@ 
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <drm/i915_drm.h>
 
 /* General customization:
  */
@@ -854,9 +855,9 @@  enum hdmi_force_audio {
 };
 
 enum i915_cache_level {
-	I915_CACHE_NONE,
-	I915_CACHE_LLC,
-	I915_CACHE_LLC_MLC, /* gen6+ */
+	I915_CACHE_NONE = I915_CACHE_LEVEL_NONE,
+	I915_CACHE_LLC = I915_CACHE_LEVEL_LLC,
+	I915_CACHE_LLC_MLC = I915_CACHE_LEVEL_LLC_MLC, /* gen6+ */
 };
 
 struct drm_i915_gem_object {
@@ -1249,6 +1250,10 @@  int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_get_cache_level_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file);
+int i915_gem_set_cache_level_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file);
 int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
 int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index da89f13..75d67b8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3102,6 +3102,56 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 	return 0;
 }
 
+int i915_gem_get_cache_level_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_cache_level *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
+	if (&obj->base == NULL) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	args->cache_level = obj->cache_level;
+
+	drm_gem_object_unreference(&obj->base);
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+int i915_gem_set_cache_level_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_cache_level *args = data;
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	ret = i915_mutex_lock_interruptible(dev);
+	if (ret)
+		return ret;
+
+	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
+	if (&obj->base == NULL) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	ret = i915_gem_object_set_cache_level(obj, args->cache_level);
+
+	drm_gem_object_unreference(&obj->base);
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
 /*
  * Prepare buffer for display plane (scanout, cursors, etc).
  * Can be called from an uninterruptible phase (modesetting) and allows
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 564005e..058feba 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -203,6 +203,8 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_WAIT	0x2c
 #define DRM_I915_GEM_CONTEXT_CREATE	0x2d
 #define DRM_I915_GEM_CONTEXT_DESTROY	0x2e
+#define DRM_I915_GEM_SET_CACHE_LEVEL	0x2f
+#define DRM_I915_GEM_GET_CACHE_LEVEL	0x30
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -227,6 +229,8 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
+#define DRM_IOCTL_I915_GEM_SET_CACHE_LEVEL		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHE_LEVEL, struct drm_i915_gem_cache_level)
+#define DRM_IOCTL_I915_GEM_GET_CACHE_LEVEL		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_LEVEL, struct drm_i915_gem_cache_level)
 #define DRM_IOCTL_I915_GEM_THROTTLE	DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE)
 #define DRM_IOCTL_I915_GEM_ENTERVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
 #define DRM_IOCTL_I915_GEM_LEAVEVT	DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
@@ -707,6 +711,18 @@  struct drm_i915_gem_busy {
 	__u32 busy;
 };
 
+#define I915_CACHE_LEVEL_NONE		0
+#define I915_CACHE_LEVEL_LLC		1
+#define I915_CACHE_LEVEL_LLC_MLC	2 /* gen6+ */
+
+struct drm_i915_gem_cache_level {
+	/** Handle of the buffer to check for busy */
+	__u32 handle;
+
+	/** Cache level to apply or return value */
+	__u32 cache_level;
+};
+
 #define I915_TILING_NONE	0
 #define I915_TILING_X		1
 #define I915_TILING_Y		2