Message ID | 1440417496-3175-2-git-send-email-ankitprasad.r.sharma@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Aug 24, 2015 at 05:28:14PM +0530, ankitprasad.r.sharma@intel.com wrote: > +static int > +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > +{ > + const struct drm_i915_gem_object_ops *ops = obj->ops; > + int ret; > + > + WARN_ON(obj->pages); > + > + if (obj->madv != I915_MADV_WILLNEED) { > + DRM_DEBUG("Attempting to obtain a purgeable object\n"); > + return -EFAULT; > + } > + > + BUG_ON(obj->pages_pin_count); Put the parameter checking into i915_gem_object_get_pages(). The __i915 version is only allowed from strict contexts and we can place the burden of being correct on the caller. > + ret = ops->get_pages(obj); > + if (ret) > + return ret; > + > + obj->get_page.sg = obj->pages->sgl; > + obj->get_page.last = 0; > + > + return 0; > +} > + > /* Ensure that the associated pages are gathered from the backing storage > * and pinned into our object. i915_gem_object_get_pages() may be called > * multiple times before they are released by a single call to > @@ -2339,28 +2377,17 @@ int > i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > { > struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > - const struct drm_i915_gem_object_ops *ops = obj->ops; > int ret; > > if (obj->pages) > return 0; > > - if (obj->madv != I915_MADV_WILLNEED) { > - DRM_DEBUG("Attempting to obtain a purgeable object\n"); > - return -EFAULT; > - } > - > - BUG_ON(obj->pages_pin_count); > - > - ret = ops->get_pages(obj); > + ret = __i915_gem_object_get_pages(obj); > if (ret) > return ret; > > list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); I am tempted to say this should be in a new __i915_gem_object_get_pages__tail_locked() so that we don't have to hunt down users if we ever need to modify the global lists. -Chris
On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote: > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > This patch provides support for the User to populate the object > with system pages at its creation time. Since this can be safely > performed without holding the 'struct_mutex', it would help to reduce > the time 'struct_mutex' is kept locked especially during the exec-buffer > path, where it is generally held for the longest time. > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > --- > drivers/gpu/drm/i915/i915_dma.c | 2 +- > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++---------- > include/uapi/drm/i915_drm.h | 11 ++++----- > 3 files changed, 45 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 8319e07..955aa16 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data, > value = HAS_RESOURCE_STREAMER(dev); > break; > case I915_PARAM_CREATE_VERSION: > - value = 2; > + value = 3; > break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index c44bd05..3904feb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -46,6 +46,7 @@ static void > i915_gem_object_retire__write(struct drm_i915_gem_object *obj); > static void > i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); > +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); > > static bool cpu_cache_is_coherent(struct drm_device *dev, > enum i915_cache_level level) > @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file, > if (obj == NULL) > return -ENOMEM; > > + if (flags & I915_CREATE_POPULATE) { > + struct drm_i915_private *dev_priv = dev->dev_private; > + > + ret = __i915_gem_object_get_pages(obj); > + if (ret) > + return ret; > + > + mutex_lock(&dev->struct_mutex); > + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); > + mutex_unlock(&dev->struct_mutex); > + } > + > ret = drm_gem_handle_create(file, &obj->base, &handle); If I915_CREATE_POPULATE is set, don't we have to release the pages when this call fails? regards Arun > /* drop reference from allocate - handle holds it now */ > drm_gem_object_unreference_unlocked(&obj->base); > @@ -2328,6 +2341,31 @@ err_pages: > return ret; > } > > +static int > +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > +{ > + const struct drm_i915_gem_object_ops *ops = obj->ops; > + int ret; > + > + WARN_ON(obj->pages); > + > + if (obj->madv != I915_MADV_WILLNEED) { > + DRM_DEBUG("Attempting to obtain a purgeable object\n"); > + return -EFAULT; > + } > + > + BUG_ON(obj->pages_pin_count); > + > + ret = ops->get_pages(obj); > + if (ret) > + return ret; > + > + obj->get_page.sg = obj->pages->sgl; > + obj->get_page.last = 0; > + > + return 0; > +} > + > /* Ensure that the associated pages are gathered from the backing storage > * and pinned into our object. i915_gem_object_get_pages() may be called > * multiple times before they are released by a single call to > @@ -2339,28 +2377,17 @@ int > i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > { > struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > - const struct drm_i915_gem_object_ops *ops = obj->ops; > int ret; > > if (obj->pages) > return 0; > > - if (obj->madv != I915_MADV_WILLNEED) { > - DRM_DEBUG("Attempting to obtain a purgeable object\n"); > - return -EFAULT; > - } > - > - BUG_ON(obj->pages_pin_count); > - > - ret = ops->get_pages(obj); > + ret = __i915_gem_object_get_pages(obj); > if (ret) > return ret; > > list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); > > - obj->get_page.sg = obj->pages->sgl; > - obj->get_page.last = 0; > - > return 0; > } > > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index f71f75c..26ea715 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -457,20 +457,19 @@ struct drm_i915_gem_create { > __u32 handle; > __u32 pad; > /** > - * Requested flags (currently used for placement > - * (which memory domain)) > + * Requested flags > * > * You can request that the object be created from special memory > * rather than regular system pages using this parameter. Such > * irregular objects may have certain restrictions (such as CPU > * access to a stolen object is verboten). > - * > - * This can be used in the future for other purposes too > - * e.g. specifying tiling/caching/madvise > + * Also using this parameter object can be pre-populated with system > + * pages. > */ > __u32 flags; > #define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */ > -#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_PLACEMENT_STOLEN << 1) > +#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */ > +#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1) > }; > > struct drm_i915_gem_pread { >
On Mon, 2015-08-24 at 13:35 +0100, Chris Wilson wrote: > On Mon, Aug 24, 2015 at 05:28:14PM +0530, ankitprasad.r.sharma@intel.com wrote: > > +static int > > +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > > +{ > > + const struct drm_i915_gem_object_ops *ops = obj->ops; > > + int ret; > > + > > + WARN_ON(obj->pages); > > + > > + if (obj->madv != I915_MADV_WILLNEED) { > > + DRM_DEBUG("Attempting to obtain a purgeable object\n"); > > + return -EFAULT; > > + } > > + > > + BUG_ON(obj->pages_pin_count); > > Put the parameter checking into i915_gem_object_get_pages(). The __i915 > version is only allowed from strict contexts and we can place the burden > of being correct on the caller. > > > + ret = ops->get_pages(obj); > > + if (ret) > > + return ret; > > + > > + obj->get_page.sg = obj->pages->sgl; > > + obj->get_page.last = 0; > > + > > + return 0; > > +} > > + > > /* Ensure that the associated pages are gathered from the backing storage > > * and pinned into our object. i915_gem_object_get_pages() may be called > > * multiple times before they are released by a single call to > > @@ -2339,28 +2377,17 @@ int > > i915_gem_object_get_pages(struct drm_i915_gem_object *obj) > > { > > struct drm_i915_private *dev_priv = obj->base.dev->dev_private; > > - const struct drm_i915_gem_object_ops *ops = obj->ops; > > int ret; > > > > if (obj->pages) > > return 0; > > > > - if (obj->madv != I915_MADV_WILLNEED) { > > - DRM_DEBUG("Attempting to obtain a purgeable object\n"); > > - return -EFAULT; > > - } > > - > > - BUG_ON(obj->pages_pin_count); > > - > > - ret = ops->get_pages(obj); > > + ret = __i915_gem_object_get_pages(obj); > > if (ret) > > return ret; > > > > list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); > > I am tempted to say this should be in a new > > __i915_gem_object_get_pages__tail_locked() > > so that we don't have to hunt down users if we ever need to modify the > global lists. Could not get you here. is it just to add list_add_tail in a separate function __i915_gem_object_get_pages__tail_locked(), or a new variant of __i915_gem_object_get_pages() that will also do the link list insertion Thanks, Ankit
On Tue, 2015-08-25 at 11:51 +0100, Siluvery, Arun wrote: > On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote: > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > > This patch provides support for the User to populate the object > > with system pages at its creation time. Since this can be safely > > performed without holding the 'struct_mutex', it would help to reduce > > the time 'struct_mutex' is kept locked especially during the exec-buffer > > path, where it is generally held for the longest time. > > > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > --- > > drivers/gpu/drm/i915/i915_dma.c | 2 +- > > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++---------- > > include/uapi/drm/i915_drm.h | 11 ++++----- > > 3 files changed, 45 insertions(+), 19 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > > index 8319e07..955aa16 100644 > > --- a/drivers/gpu/drm/i915/i915_dma.c > > +++ b/drivers/gpu/drm/i915/i915_dma.c > > @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data, > > value = HAS_RESOURCE_STREAMER(dev); > > break; > > case I915_PARAM_CREATE_VERSION: > > - value = 2; > > + value = 3; > > break; > > default: > > DRM_DEBUG("Unknown parameter %d\n", param->param); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > index c44bd05..3904feb 100644 > > --- a/drivers/gpu/drm/i915/i915_gem.c > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > @@ -46,6 +46,7 @@ static void > > i915_gem_object_retire__write(struct drm_i915_gem_object *obj); > > static void > > i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); > > +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); > > > > static bool cpu_cache_is_coherent(struct drm_device *dev, > > enum i915_cache_level level) > > @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file, > > if (obj == NULL) > > return -ENOMEM; > > > > + if (flags & I915_CREATE_POPULATE) { > > + struct drm_i915_private *dev_priv = dev->dev_private; > > + > > + ret = __i915_gem_object_get_pages(obj); > > + if (ret) > > + return ret; > > + > > + mutex_lock(&dev->struct_mutex); > > + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); > > + mutex_unlock(&dev->struct_mutex); > > + } > > + > > ret = drm_gem_handle_create(file, &obj->base, &handle); > > If I915_CREATE_POPULATE is set, don't we have to release the pages when > this call fails? I guess the i915_gem_object_get_pages_* takes care of releasing the pages when returning an error. One more thing, What can be preferred here when an error is returned by __i915_gem_object_get_pages? Shall we return error to the userspace after unreferencing the object or to continue without pre-populating pages (and returning object handle)? Thanks, Ankit
On Thu, Aug 27, 2015 at 12:04:37PM +0530, Ankitprasad Sharma wrote: > On Tue, 2015-08-25 at 11:51 +0100, Siluvery, Arun wrote: > > On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote: > > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > > > > This patch provides support for the User to populate the object > > > with system pages at its creation time. Since this can be safely > > > performed without holding the 'struct_mutex', it would help to reduce > > > the time 'struct_mutex' is kept locked especially during the exec-buffer > > > path, where it is generally held for the longest time. > > > > > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> > > > --- > > > drivers/gpu/drm/i915/i915_dma.c | 2 +- > > > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++---------- > > > include/uapi/drm/i915_drm.h | 11 ++++----- > > > 3 files changed, 45 insertions(+), 19 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > > > index 8319e07..955aa16 100644 > > > --- a/drivers/gpu/drm/i915/i915_dma.c > > > +++ b/drivers/gpu/drm/i915/i915_dma.c > > > @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data, > > > value = HAS_RESOURCE_STREAMER(dev); > > > break; > > > case I915_PARAM_CREATE_VERSION: > > > - value = 2; > > > + value = 3; > > > break; > > > default: > > > DRM_DEBUG("Unknown parameter %d\n", param->param); > > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > > > index c44bd05..3904feb 100644 > > > --- a/drivers/gpu/drm/i915/i915_gem.c > > > +++ b/drivers/gpu/drm/i915/i915_gem.c > > > @@ -46,6 +46,7 @@ static void > > > i915_gem_object_retire__write(struct drm_i915_gem_object *obj); > > > static void > > > i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); > > > +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); > > > > > > static bool cpu_cache_is_coherent(struct drm_device *dev, > > > enum i915_cache_level level) > > > @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file, > > > if (obj == NULL) > > > return -ENOMEM; > > > > > > + if (flags & I915_CREATE_POPULATE) { > > > + struct drm_i915_private *dev_priv = dev->dev_private; > > > + > > > + ret = __i915_gem_object_get_pages(obj); > > > + if (ret) > > > + return ret; > > > + > > > + mutex_lock(&dev->struct_mutex); > > > + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); > > > + mutex_unlock(&dev->struct_mutex); > > > + } > > > + > > > ret = drm_gem_handle_create(file, &obj->base, &handle); > > > > If I915_CREATE_POPULATE is set, don't we have to release the pages when > > this call fails? > I guess the i915_gem_object_get_pages_* takes care of releasing the > pages when returning an error. Oh, it should just be calling drm_gem_object_unreference_unlocked(). No need to worry about obj->pages as they will be reaped along with the object. > One more thing, > What can be preferred here when an error is returned by > __i915_gem_object_get_pages? > Shall we return error to the userspace after unreferencing the object or > to continue without pre-populating pages (and returning object handle)? Report the error. Userspace can then decide if it wants to allocate an object without prepulated pages, try searching/reaping its own caches harder for an available object, or do something different entirely to avoid excess memory usage. -Chris
On Thu, Aug 27, 2015 at 11:18:15AM +0530, Ankitprasad Sharma wrote: > On Mon, 2015-08-24 at 13:35 +0100, Chris Wilson wrote: > > I am tempted to say this should be in a new > > > > __i915_gem_object_get_pages__tail_locked() > > > > so that we don't have to hunt down users if we ever need to modify the > > global lists. > Could not get you here. > is it just to add list_add_tail in a separate function > __i915_gem_object_get_pages__tail_locked(), Yes. It's just a preventative maintenance step for when we need something other than just adding the object to the global list. I would rather not have core state tracking dotted around random callers. -Chris
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8319e07..955aa16 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = HAS_RESOURCE_STREAMER(dev); break; case I915_PARAM_CREATE_VERSION: - value = 2; + value = 3; break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c44bd05..3904feb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -46,6 +46,7 @@ static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj); static void i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring); +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file, if (obj == NULL) return -ENOMEM; + if (flags & I915_CREATE_POPULATE) { + struct drm_i915_private *dev_priv = dev->dev_private; + + ret = __i915_gem_object_get_pages(obj); + if (ret) + return ret; + + mutex_lock(&dev->struct_mutex); + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); + mutex_unlock(&dev->struct_mutex); + } + ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(&obj->base); @@ -2328,6 +2341,31 @@ err_pages: return ret; } +static int +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj) +{ + const struct drm_i915_gem_object_ops *ops = obj->ops; + int ret; + + WARN_ON(obj->pages); + + if (obj->madv != I915_MADV_WILLNEED) { + DRM_DEBUG("Attempting to obtain a purgeable object\n"); + return -EFAULT; + } + + BUG_ON(obj->pages_pin_count); + + ret = ops->get_pages(obj); + if (ret) + return ret; + + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + + return 0; +} + /* Ensure that the associated pages are gathered from the backing storage * and pinned into our object. i915_gem_object_get_pages() may be called * multiple times before they are released by a single call to @@ -2339,28 +2377,17 @@ int i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; - const struct drm_i915_gem_object_ops *ops = obj->ops; int ret; if (obj->pages) return 0; - if (obj->madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to obtain a purgeable object\n"); - return -EFAULT; - } - - BUG_ON(obj->pages_pin_count); - - ret = ops->get_pages(obj); + ret = __i915_gem_object_get_pages(obj); if (ret) return ret; list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; - return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f71f75c..26ea715 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -457,20 +457,19 @@ struct drm_i915_gem_create { __u32 handle; __u32 pad; /** - * Requested flags (currently used for placement - * (which memory domain)) + * Requested flags * * You can request that the object be created from special memory * rather than regular system pages using this parameter. Such * irregular objects may have certain restrictions (such as CPU * access to a stolen object is verboten). - * - * This can be used in the future for other purposes too - * e.g. specifying tiling/caching/madvise + * Also using this parameter object can be pre-populated with system + * pages. */ __u32 flags; #define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */ -#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_PLACEMENT_STOLEN << 1) +#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */ +#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1) }; struct drm_i915_gem_pread {