Message ID | 1435742747-3782-3-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote: > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > Extend the drm_i915_gem_create structure to add support for > creating Stolen memory backed objects. Added a new flag through > which user can specify the preference to allocate the object from > stolen memory, which if set, an attempt will be made to allocate > the object from stolen memory subject to the availability of > free space in the stolen region. > > v2: Rebased to the latest drm-intel-nightly (Ankit) > > testcase: igt/gem_stolen > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 3 +++ > drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++---- > include/uapi/drm/i915_drm.h | 15 +++++++++++++++ > 3 files changed, 45 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index c5349fa..6045749 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > value = i915.enable_hangcheck && > intel_has_gpu_reset(dev); > break; > + case I915_PARAM_CREATE_VERSION: > + value = 1; Shouldn't it be 2? > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index a2a4a27..4acf331 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -391,7 +391,8 @@ static int > i915_gem_create(struct drm_file *file, > struct drm_device *dev, > uint64_t size, > - uint32_t *handle_p) > + uint32_t *handle_p, > + uint32_t flags) > { > struct drm_i915_gem_object *obj; > int ret; > @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file, > if (size == 0) > return -EINVAL; > > + if (flags & ~(I915_CREATE_PLACEMENT_STOLEN)) > + return -EINVAL; > + > /* Allocate the new object */ > - obj = i915_gem_alloc_object(dev, size); > + if (flags & I915_CREATE_PLACEMENT_STOLEN) { > + mutex_lock(&dev->struct_mutex); Probably need the interruptible variant so userspace can Ctrl-C if things get stuck in submission/waiting. > + obj = i915_gem_object_create_stolen(dev, size); > + if (!obj) { > + mutex_unlock(&dev->struct_mutex); > + return -ENOMEM; > + } > + > + ret = i915_gem_exec_clear_object(obj, file->driver_priv); I would put a comment here saying why it is important to clear stolen memory. > + if (ret) { > + i915_gem_object_free(obj); This should probably be drm_gem_object_unreference. > + mutex_unlock(&dev->struct_mutex); > + return ret; > + } > + > + mutex_unlock(&dev->struct_mutex); > + } else > + obj = i915_gem_alloc_object(dev, size); Need curly braces on both branches. > if (obj == NULL) > return -ENOMEM; > > @@ -425,7 +447,7 @@ i915_gem_dumb_create(struct drm_file *file, > args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); > args->size = args->pitch * args->height; > return i915_gem_create(file, dev, > - args->size, &args->handle); > + args->size, &args->handle, 0); > } > > /** > @@ -438,7 +460,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, > struct drm_i915_gem_create *args = data; > > return i915_gem_create(file, dev, > - args->size, &args->handle); > + args->size, &args->handle, > + args->flags); > } > > static inline int > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index f88cc1c..87992d1 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_SUBSLICE_TOTAL 33 > #define I915_PARAM_EU_TOTAL 34 > #define I915_PARAM_HAS_GPU_RESET 35 > +#define I915_PARAM_CREATE_VERSION 36 > > typedef struct drm_i915_getparam { > int param; > @@ -450,6 +451,20 @@ struct drm_i915_gem_create { > */ > __u32 handle; > __u32 pad; > + /** > + * Requested flags (currently used for placement > + * (which memory domain)) > + * > + * You can request that the object be created from special memory > + * rather than regular system pages using this parameter. Such > + * irregular objects may have certain restrictions (such as CPU > + * access to a stolen object is verboten). I'd just use English all the way. :) > + * > + * This can be used in the future for other purposes too > + * e.g. specifying tiling/caching/madvise > + */ > + __u32 flags; > +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */ > }; > > struct drm_i915_gem_pread { > Regards, Tvrtko
On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote: > > > On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote: > >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > >Extend the drm_i915_gem_create structure to add support for > >creating Stolen memory backed objects. Added a new flag through > >which user can specify the preference to allocate the object from > >stolen memory, which if set, an attempt will be made to allocate > >the object from stolen memory subject to the availability of > >free space in the stolen region. > > > >v2: Rebased to the latest drm-intel-nightly (Ankit) > > > >testcase: igt/gem_stolen > > > >Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > >--- > > drivers/gpu/drm/i915/i915_dma.c | 3 +++ > > drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++---- > > include/uapi/drm/i915_drm.h | 15 +++++++++++++++ > > 3 files changed, 45 insertions(+), 4 deletions(-) > > > >diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > >index c5349fa..6045749 100644 > >--- a/drivers/gpu/drm/i915/i915_dma.c > >+++ b/drivers/gpu/drm/i915/i915_dma.c > >@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > > value = i915.enable_hangcheck && > > intel_has_gpu_reset(dev); > > break; > >+ case I915_PARAM_CREATE_VERSION: > >+ value = 1; > > Shouldn't it be 2? But 1 is the 2nd number, discounting all those pesky negative versions :) > > /* Allocate the new object */ > >- obj = i915_gem_alloc_object(dev, size); > >+ if (flags & I915_CREATE_PLACEMENT_STOLEN) { > >+ mutex_lock(&dev->struct_mutex); > > Probably need the interruptible variant so userspace can Ctrl-C if > things get stuck in submission/waiting. Paulo has been working on removing this struct_mutex requirement, in which case internally we will take a stolen_mutex around the drm_mm as required. > >+ obj = i915_gem_object_create_stolen(dev, size); > >+ if (!obj) { > >+ mutex_unlock(&dev->struct_mutex); > >+ return -ENOMEM; > >+ } > >+ And pushes the struct_mutex to here (one day, one glorious day that will be a vm->mutex or something!). And yes, you will want, nay must use, ret = i915_mutex_interruptible(dev); before thinking about using the GPU. > >+ ret = i915_gem_exec_clear_object(obj, file->driver_priv); > > I would put a comment here saying why it is important to clear > stolen memory. Userspace ABI (and kernel ABI in general) is that we do not hand back uncleared buffers. Something to do with bank card details I guess. So just: /* always clear fresh buffers before handing to userspace */ An alternative is that I've been contemplating a private page pool to reuse and not clear. It's a trade-off between having a large cache in userspace, and a less flexible cache in the kernel with the supposed advantage that the kernel cache could be more space efficient. > >+ if (ret) { > >+ i915_gem_object_free(obj); > > This should probably be drm_gem_object_unreference. > > >+ mutex_unlock(&dev->struct_mutex); > >+ return ret; > >+ } > >+ > >+ mutex_unlock(&dev->struct_mutex); > >+ } else > >+ obj = i915_gem_alloc_object(dev, size); > > Need curly braces on both branches. I am sure someone hacked CODING_STYLE. Or I should. > >@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { > > #define I915_PARAM_SUBSLICE_TOTAL 33 > > #define I915_PARAM_EU_TOTAL 34 > > #define I915_PARAM_HAS_GPU_RESET 35 > >+#define I915_PARAM_CREATE_VERSION 36 > > > > typedef struct drm_i915_getparam { > > int param; > >@@ -450,6 +451,20 @@ struct drm_i915_gem_create { > > */ > > __u32 handle; > > __u32 pad; > >+ /** > >+ * Requested flags (currently used for placement > >+ * (which memory domain)) > >+ * > >+ * You can request that the object be created from special memory > >+ * rather than regular system pages using this parameter. Such > >+ * irregular objects may have certain restrictions (such as CPU > >+ * access to a stolen object is verboten). > > I'd just use English all the way. :) Heh!, English is highly adaptible language and steals good words all the time! > >+ * > >+ * This can be used in the future for other purposes too > >+ * e.g. specifying tiling/caching/madvise > >+ */ > >+ __u32 flags; > >+#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */ Note that we dropped the pread/pwrite restriction. -Chris
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote: > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > Extend the drm_i915_gem_create structure to add support for > creating Stolen memory backed objects. Added a new flag through > which user can specify the preference to allocate the object from > stolen memory, which if set, an attempt will be made to allocate > the object from stolen memory subject to the availability of > free space in the stolen region. > > v2: Rebased to the latest drm-intel-nightly (Ankit) > > testcase: igt/gem_stolen > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 3 +++ > drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++---- > include/uapi/drm/i915_drm.h | 15 +++++++++++++++ > 3 files changed, 45 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index c5349fa..6045749 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > value = i915.enable_hangcheck && > intel_has_gpu_reset(dev); > break; > + case I915_PARAM_CREATE_VERSION: > + value = 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index a2a4a27..4acf331 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -391,7 +391,8 @@ static int > i915_gem_create(struct drm_file *file, > struct drm_device *dev, > uint64_t size, > - uint32_t *handle_p) > + uint32_t *handle_p, > + uint32_t flags) > { > struct drm_i915_gem_object *obj; > int ret; > @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file, > if (size == 0) > return -EINVAL; > > + if (flags & ~(I915_CREATE_PLACEMENT_STOLEN)) > + return -EINVAL; > + > /* Allocate the new object */ > - obj = i915_gem_alloc_object(dev, size); > + if (flags & I915_CREATE_PLACEMENT_STOLEN) { > + mutex_lock(&dev->struct_mutex); > + obj = i915_gem_object_create_stolen(dev, size); One more thing here, size is u64 in this function but i915_gem_object_create_stolen takes u32. Is compiler not noticing this? (And i915_gem_alloc_object is size_t for a complete win!) :D Regards, Tvrtko
On 07/01/2015 05:19 PM, Chris Wilson wrote: > On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote: >>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c >>> index c5349fa..6045749 100644 >>> --- a/drivers/gpu/drm/i915/i915_dma.c >>> +++ b/drivers/gpu/drm/i915/i915_dma.c >>> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, >>> value = i915.enable_hangcheck && >>> intel_has_gpu_reset(dev); >>> break; >>> + case I915_PARAM_CREATE_VERSION: >>> + value = 1; >> >> Shouldn't it be 2? > > But 1 is the 2nd number, discounting all those pesky negative versions :) It would be more obvious I think, even though I915_PARAM_CREATE_VERSION which returns 1 would never exist. >>> + ret = i915_gem_exec_clear_object(obj, file->driver_priv); >> >> I would put a comment here saying why it is important to clear >> stolen memory. > > Userspace ABI (and kernel ABI in general) is that we do not hand back > uncleared buffers. Something to do with bank card details I guess. > So just: Yes thats obvious - but where it is done for normal objects? Can't find it... is it hidden in shmemfs somewhere? If so reinforces the need for a comment here. Regards, Tvrtko
On Thu, Jul 02, 2015 at 10:37:56AM +0100, Tvrtko Ursulin wrote: > > On 07/01/2015 05:19 PM, Chris Wilson wrote: > >On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote: > >>>diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > >>>index c5349fa..6045749 100644 > >>>--- a/drivers/gpu/drm/i915/i915_dma.c > >>>+++ b/drivers/gpu/drm/i915/i915_dma.c > >>>@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, > >>> value = i915.enable_hangcheck && > >>> intel_has_gpu_reset(dev); > >>> break; > >>>+ case I915_PARAM_CREATE_VERSION: > >>>+ value = 1; > >> > >>Shouldn't it be 2? > > > >But 1 is the 2nd number, discounting all those pesky negative versions :) > > It would be more obvious I think, even though > I915_PARAM_CREATE_VERSION which returns 1 would never exist. I don't disagree. I was just used to always starting my versions at 1, 0 being the unversioned. > >>>+ ret = i915_gem_exec_clear_object(obj, file->driver_priv); > >> > >>I would put a comment here saying why it is important to clear > >>stolen memory. > > > >Userspace ABI (and kernel ABI in general) is that we do not hand back > >uncleared buffers. Something to do with bank card details I guess. > >So just: > > Yes thats obvious - but where it is done for normal objects? Can't > find it... is it hidden in shmemfs somewhere? If so reinforces the > need for a comment here. Yes, it is deep within shmemfs, shmem_getpage_gfp() will call clear_highpage() on new pages. It is an unfortunate cost that we want to avoid for frequently allocated internal objects (such as contexts, pdp) where we just want a simple allocator rather than shmemfs. -Chris
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c5349fa..6045749 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915.enable_hangcheck && intel_has_gpu_reset(dev); break; + case I915_PARAM_CREATE_VERSION: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a2a4a27..4acf331 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -391,7 +391,8 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, - uint32_t *handle_p) + uint32_t *handle_p, + uint32_t flags) { struct drm_i915_gem_object *obj; int ret; @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & ~(I915_CREATE_PLACEMENT_STOLEN)) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + if (flags & I915_CREATE_PLACEMENT_STOLEN) { + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return -ENOMEM; + } + + ret = i915_gem_exec_clear_object(obj, file->driver_priv); + if (ret) { + i915_gem_object_free(obj); + mutex_unlock(&dev->struct_mutex); + return ret; + } + + mutex_unlock(&dev->struct_mutex); + } else + obj = i915_gem_alloc_object(dev, size); + if (obj == NULL) return -ENOMEM; @@ -425,7 +447,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, &args->handle, 0); } /** @@ -438,7 +460,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, &args->handle, + args->flags); } static inline int diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f88cc1c..87992d1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET 35 +#define I915_PARAM_CREATE_VERSION 36 typedef struct drm_i915_getparam { int param; @@ -450,6 +451,20 @@ struct drm_i915_gem_create { */ __u32 handle; __u32 pad; + /** + * Requested flags (currently used for placement + * (which memory domain)) + * + * You can request that the object be created from special memory + * rather than regular system pages using this parameter. Such + * irregular objects may have certain restrictions (such as CPU + * access to a stolen object is verboten). + * + * This can be used in the future for other purposes too + * e.g. specifying tiling/caching/madvise + */ + __u32 flags; +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */ }; struct drm_i915_gem_pread {