diff mbox

[2/4] drm/i915: Support for creating Stolen memory backed objects

Message ID 1435742747-3782-3-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

ankitprasad.r.sharma@intel.com July 1, 2015, 9:25 a.m. UTC
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>

Extend the drm_i915_gem_create structure to add support for
creating Stolen memory backed objects. Added a new flag through
which user can specify the preference to allocate the object from
stolen memory, which if set, an attempt will be made to allocate
the object from stolen memory subject to the availability of
free space in the stolen region.

v2: Rebased to the latest drm-intel-nightly (Ankit)

testcase: igt/gem_stolen

Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
 include/uapi/drm/i915_drm.h     | 15 +++++++++++++++
 3 files changed, 45 insertions(+), 4 deletions(-)

Comments

Tvrtko Ursulin July 1, 2015, 3:06 p.m. UTC | #1
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> Extend the drm_i915_gem_create structure to add support for
> creating Stolen memory backed objects. Added a new flag through
> which user can specify the preference to allocate the object from
> stolen memory, which if set, an attempt will be made to allocate
> the object from stolen memory subject to the availability of
> free space in the stolen region.
>
> v2: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c |  3 +++
>   drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
>   include/uapi/drm/i915_drm.h     | 15 +++++++++++++++
>   3 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index c5349fa..6045749 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   		value = i915.enable_hangcheck &&
>   			intel_has_gpu_reset(dev);
>   		break;
> +	case I915_PARAM_CREATE_VERSION:
> +		value = 1;

Shouldn't it be 2?

> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a2a4a27..4acf331 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -391,7 +391,8 @@ static int
>   i915_gem_create(struct drm_file *file,
>   		struct drm_device *dev,
>   		uint64_t size,
> -		uint32_t *handle_p)
> +		uint32_t *handle_p,
> +		uint32_t flags)
>   {
>   	struct drm_i915_gem_object *obj;
>   	int ret;
> @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file,
>   	if (size == 0)
>   		return -EINVAL;
>
> +	if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
> +		return -EINVAL;
> +
>   	/* Allocate the new object */
> -	obj = i915_gem_alloc_object(dev, size);
> +	if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> +		mutex_lock(&dev->struct_mutex);

Probably need the interruptible variant so userspace can Ctrl-C if 
things get stuck in submission/waiting.

> +		obj = i915_gem_object_create_stolen(dev, size);
> +		if (!obj) {
> +			mutex_unlock(&dev->struct_mutex);
> +			return -ENOMEM;
> +		}
> +
> +		ret = i915_gem_exec_clear_object(obj, file->driver_priv);

I would put a comment here saying why it is important to clear stolen 
memory.

> +		if (ret) {
> +			i915_gem_object_free(obj);

This should probably be drm_gem_object_unreference.

> +			mutex_unlock(&dev->struct_mutex);
> +			return ret;
> +		}
> +
> +		mutex_unlock(&dev->struct_mutex);
> +	} else
> +		obj = i915_gem_alloc_object(dev, size);

Need curly braces on both branches.

>   	if (obj == NULL)
>   		return -ENOMEM;
>
> @@ -425,7 +447,7 @@ i915_gem_dumb_create(struct drm_file *file,
>   	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
>   	args->size = args->pitch * args->height;
>   	return i915_gem_create(file, dev,
> -			       args->size, &args->handle);
> +			       args->size, &args->handle, 0);
>   }
>
>   /**
> @@ -438,7 +460,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
>   	struct drm_i915_gem_create *args = data;
>
>   	return i915_gem_create(file, dev,
> -			       args->size, &args->handle);
> +			       args->size, &args->handle,
> +			       args->flags);
>   }
>
>   static inline int
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index f88cc1c..87992d1 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
>   #define I915_PARAM_SUBSLICE_TOTAL	 33
>   #define I915_PARAM_EU_TOTAL		 34
>   #define I915_PARAM_HAS_GPU_RESET	 35
> +#define I915_PARAM_CREATE_VERSION	 36
>
>   typedef struct drm_i915_getparam {
>   	int param;
> @@ -450,6 +451,20 @@ struct drm_i915_gem_create {
>   	 */
>   	__u32 handle;
>   	__u32 pad;
> +	/**
> +	 * Requested flags (currently used for placement
> +	 * (which memory domain))
> +	 *
> +	 * You can request that the object be created from special memory
> +	 * rather than regular system pages using this parameter. Such
> +	 * irregular objects may have certain restrictions (such as CPU
> +	 * access to a stolen object is verboten).

I'd just use English all the way. :)

> +	 *
> +	 * This can be used in the future for other purposes too
> +	 * e.g. specifying tiling/caching/madvise
> +	 */
> +	__u32 flags;
> +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */
>   };
>
>   struct drm_i915_gem_pread {
>

Regards,

Tvrtko
Chris Wilson July 1, 2015, 4:19 p.m. UTC | #2
On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
> 
> 
> On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> >Extend the drm_i915_gem_create structure to add support for
> >creating Stolen memory backed objects. Added a new flag through
> >which user can specify the preference to allocate the object from
> >stolen memory, which if set, an attempt will be made to allocate
> >the object from stolen memory subject to the availability of
> >free space in the stolen region.
> >
> >v2: Rebased to the latest drm-intel-nightly (Ankit)
> >
> >testcase: igt/gem_stolen
> >
> >Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_dma.c |  3 +++
> >  drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
> >  include/uapi/drm/i915_drm.h     | 15 +++++++++++++++
> >  3 files changed, 45 insertions(+), 4 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> >index c5349fa..6045749 100644
> >--- a/drivers/gpu/drm/i915/i915_dma.c
> >+++ b/drivers/gpu/drm/i915/i915_dma.c
> >@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> >  		value = i915.enable_hangcheck &&
> >  			intel_has_gpu_reset(dev);
> >  		break;
> >+	case I915_PARAM_CREATE_VERSION:
> >+		value = 1;
> 
> Shouldn't it be 2?

But 1 is the 2nd number, discounting all those pesky negative versions :)

> >  	/* Allocate the new object */
> >-	obj = i915_gem_alloc_object(dev, size);
> >+	if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> >+		mutex_lock(&dev->struct_mutex);
> 
> Probably need the interruptible variant so userspace can Ctrl-C if
> things get stuck in submission/waiting.

Paulo has been working on removing this struct_mutex requirement, in
which case internally we will take a stolen_mutex around the drm_mm as
required.
 
> >+		obj = i915_gem_object_create_stolen(dev, size);
> >+		if (!obj) {
> >+			mutex_unlock(&dev->struct_mutex);
> >+			return -ENOMEM;
> >+		}
> >+

And pushes the struct_mutex to here (one day, one glorious day that will
be a vm->mutex or something!).

And yes, you will want, nay must use,

	ret = i915_mutex_interruptible(dev);

before thinking about using the GPU.

> >+		ret = i915_gem_exec_clear_object(obj, file->driver_priv);
> 
> I would put a comment here saying why it is important to clear
> stolen memory.

Userspace ABI (and kernel ABI in general) is that we do not hand back
uncleared buffers. Something to do with bank card details I guess.
So just:

/* always clear fresh buffers before handing to userspace */

An alternative is that I've been contemplating a private page pool to
reuse and not clear. It's a trade-off between having a large cache in
userspace, and a less flexible cache in the kernel with the supposed
advantage that the kernel cache could be more space efficient.

> >+		if (ret) {
> >+			i915_gem_object_free(obj);
> 
> This should probably be drm_gem_object_unreference.
> 
> >+			mutex_unlock(&dev->struct_mutex);
> >+			return ret;
> >+		}
> >+
> >+		mutex_unlock(&dev->struct_mutex);
> >+	} else
> >+		obj = i915_gem_alloc_object(dev, size);
> 
> Need curly braces on both branches.

I am sure someone hacked CODING_STYLE. Or I should.
 
> >@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
> >  #define I915_PARAM_SUBSLICE_TOTAL	 33
> >  #define I915_PARAM_EU_TOTAL		 34
> >  #define I915_PARAM_HAS_GPU_RESET	 35
> >+#define I915_PARAM_CREATE_VERSION	 36
> >
> >  typedef struct drm_i915_getparam {
> >  	int param;
> >@@ -450,6 +451,20 @@ struct drm_i915_gem_create {
> >  	 */
> >  	__u32 handle;
> >  	__u32 pad;
> >+	/**
> >+	 * Requested flags (currently used for placement
> >+	 * (which memory domain))
> >+	 *
> >+	 * You can request that the object be created from special memory
> >+	 * rather than regular system pages using this parameter. Such
> >+	 * irregular objects may have certain restrictions (such as CPU
> >+	 * access to a stolen object is verboten).
> 
> I'd just use English all the way. :)

Heh!, English is highly adaptible language and steals good words all
the time!

> >+	 *
> >+	 * This can be used in the future for other purposes too
> >+	 * e.g. specifying tiling/caching/madvise
> >+	 */
> >+	__u32 flags;
> >+#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */

Note that we dropped the pread/pwrite restriction.
-Chris
Tvrtko Ursulin July 1, 2015, 4:20 p.m. UTC | #3
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> Extend the drm_i915_gem_create structure to add support for
> creating Stolen memory backed objects. Added a new flag through
> which user can specify the preference to allocate the object from
> stolen memory, which if set, an attempt will be made to allocate
> the object from stolen memory subject to the availability of
> free space in the stolen region.
>
> v2: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_dma.c |  3 +++
>   drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
>   include/uapi/drm/i915_drm.h     | 15 +++++++++++++++
>   3 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index c5349fa..6045749 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   		value = i915.enable_hangcheck &&
>   			intel_has_gpu_reset(dev);
>   		break;
> +	case I915_PARAM_CREATE_VERSION:
> +		value = 1;
> +		break;
>   	default:
>   		DRM_DEBUG("Unknown parameter %d\n", param->param);
>   		return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a2a4a27..4acf331 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -391,7 +391,8 @@ static int
>   i915_gem_create(struct drm_file *file,
>   		struct drm_device *dev,
>   		uint64_t size,
> -		uint32_t *handle_p)
> +		uint32_t *handle_p,
> +		uint32_t flags)
>   {
>   	struct drm_i915_gem_object *obj;
>   	int ret;
> @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file,
>   	if (size == 0)
>   		return -EINVAL;
>
> +	if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
> +		return -EINVAL;
> +
>   	/* Allocate the new object */
> -	obj = i915_gem_alloc_object(dev, size);
> +	if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> +		mutex_lock(&dev->struct_mutex);
> +		obj = i915_gem_object_create_stolen(dev, size);

One more thing here, size is u64 in this function but 
i915_gem_object_create_stolen takes u32. Is compiler not noticing this?

(And i915_gem_alloc_object is size_t for a complete win!) :D

Regards,

Tvrtko
Tvrtko Ursulin July 2, 2015, 9:37 a.m. UTC | #4
On 07/01/2015 05:19 PM, Chris Wilson wrote:
> On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>>> index c5349fa..6045749 100644
>>> --- a/drivers/gpu/drm/i915/i915_dma.c
>>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>>> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>>>   		value = i915.enable_hangcheck &&
>>>   			intel_has_gpu_reset(dev);
>>>   		break;
>>> +	case I915_PARAM_CREATE_VERSION:
>>> +		value = 1;
>>
>> Shouldn't it be 2?
>
> But 1 is the 2nd number, discounting all those pesky negative versions :)

It would be more obvious I think, even though I915_PARAM_CREATE_VERSION 
which returns 1 would never exist.

>>> +		ret = i915_gem_exec_clear_object(obj, file->driver_priv);
>>
>> I would put a comment here saying why it is important to clear
>> stolen memory.
>
> Userspace ABI (and kernel ABI in general) is that we do not hand back
> uncleared buffers. Something to do with bank card details I guess.
> So just:

Yes thats obvious - but where it is done for normal objects? Can't find 
it... is it hidden in shmemfs somewhere? If so reinforces the need for a 
comment here.

Regards,

Tvrtko
Chris Wilson July 2, 2015, 9:43 a.m. UTC | #5
On Thu, Jul 02, 2015 at 10:37:56AM +0100, Tvrtko Ursulin wrote:
> 
> On 07/01/2015 05:19 PM, Chris Wilson wrote:
> >On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
> >>>diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> >>>index c5349fa..6045749 100644
> >>>--- a/drivers/gpu/drm/i915/i915_dma.c
> >>>+++ b/drivers/gpu/drm/i915/i915_dma.c
> >>>@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> >>>  		value = i915.enable_hangcheck &&
> >>>  			intel_has_gpu_reset(dev);
> >>>  		break;
> >>>+	case I915_PARAM_CREATE_VERSION:
> >>>+		value = 1;
> >>
> >>Shouldn't it be 2?
> >
> >But 1 is the 2nd number, discounting all those pesky negative versions :)
> 
> It would be more obvious I think, even though
> I915_PARAM_CREATE_VERSION which returns 1 would never exist.

I don't disagree. I was just used to always starting my versions at 1, 0
being the unversioned.
 
> >>>+		ret = i915_gem_exec_clear_object(obj, file->driver_priv);
> >>
> >>I would put a comment here saying why it is important to clear
> >>stolen memory.
> >
> >Userspace ABI (and kernel ABI in general) is that we do not hand back
> >uncleared buffers. Something to do with bank card details I guess.
> >So just:
> 
> Yes thats obvious - but where it is done for normal objects? Can't
> find it... is it hidden in shmemfs somewhere? If so reinforces the
> need for a comment here.

Yes, it is deep within shmemfs, shmem_getpage_gfp() will call
clear_highpage() on new pages. It is an unfortunate cost that we want to
avoid for frequently allocated internal objects (such as contexts, pdp)
where we just want a simple allocator rather than shmemfs.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c5349fa..6045749 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -167,6 +167,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 		value = i915.enable_hangcheck &&
 			intel_has_gpu_reset(dev);
 		break;
+	case I915_PARAM_CREATE_VERSION:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a2a4a27..4acf331 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -391,7 +391,8 @@  static int
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
 		uint64_t size,
-		uint32_t *handle_p)
+		uint32_t *handle_p,
+		uint32_t flags)
 {
 	struct drm_i915_gem_object *obj;
 	int ret;
@@ -401,8 +402,29 @@  i915_gem_create(struct drm_file *file,
 	if (size == 0)
 		return -EINVAL;
 
+	if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
+		return -EINVAL;
+
 	/* Allocate the new object */
-	obj = i915_gem_alloc_object(dev, size);
+	if (flags & I915_CREATE_PLACEMENT_STOLEN) {
+		mutex_lock(&dev->struct_mutex);
+		obj = i915_gem_object_create_stolen(dev, size);
+		if (!obj) {
+			mutex_unlock(&dev->struct_mutex);
+			return -ENOMEM;
+		}
+
+		ret = i915_gem_exec_clear_object(obj, file->driver_priv);
+		if (ret) {
+			i915_gem_object_free(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+
+		mutex_unlock(&dev->struct_mutex);
+	} else
+		obj = i915_gem_alloc_object(dev, size);
+
 	if (obj == NULL)
 		return -ENOMEM;
 
@@ -425,7 +447,7 @@  i915_gem_dumb_create(struct drm_file *file,
 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 	args->size = args->pitch * args->height;
 	return i915_gem_create(file, dev,
-			       args->size, &args->handle);
+			       args->size, &args->handle, 0);
 }
 
 /**
@@ -438,7 +460,8 @@  i915_gem_create_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_gem_create *args = data;
 
 	return i915_gem_create(file, dev,
-			       args->size, &args->handle);
+			       args->size, &args->handle,
+			       args->flags);
 }
 
 static inline int
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..87992d1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_SUBSLICE_TOTAL	 33
 #define I915_PARAM_EU_TOTAL		 34
 #define I915_PARAM_HAS_GPU_RESET	 35
+#define I915_PARAM_CREATE_VERSION	 36
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -450,6 +451,20 @@  struct drm_i915_gem_create {
 	 */
 	__u32 handle;
 	__u32 pad;
+	/**
+	 * Requested flags (currently used for placement
+	 * (which memory domain))
+	 *
+	 * You can request that the object be created from special memory
+	 * rather than regular system pages using this parameter. Such
+	 * irregular objects may have certain restrictions (such as CPU
+	 * access to a stolen object is verboten).
+	 *
+	 * This can be used in the future for other purposes too
+	 * e.g. specifying tiling/caching/madvise
+	 */
+	__u32 flags;
+#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */
 };
 
 struct drm_i915_gem_pread {