Message ID | 1461159055-11991-1-git-send-email-david.s.gordon@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 20/04/16 14:30, Dave Gordon wrote: > The recently-added i915_gem_object_pin_map() can be further optimised > for "small" objects. To facilitate this, and simplify the error paths > before adding the new code, this patch pulls out the "mapping" part of > the operation (involving local allocations which must be undone before > return) into its own subfunction. > > The next patch will then insert the new optimisation into the middle of > the now-separated subfunction. > > This reorganisation will probably not affect the generated code, as the > compiler will most likely inline it anyway, but it makes the logical > structure a bit clearer and easier to modify. > > v2: > Restructure loop-over-pages & error check (Chris Wilson) > > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++--------------- > 1 file changed, 37 insertions(+), 21 deletions(-) [snip] > + for_each_sg_page(sg, &sg_iter, n_pages, 0) > + pages[i++] = sg_page_iter_page(&sg_iter); > + > + /* Check that we have the expected number of pages */ > + if (!WARN_ON(i != n_pages)) > + addr = vmap(pages, n_pages, 0, PAGE_KERNEL); Well actually the shorter loop and the subsequent check didn't turn out too ugly; the only thing I don't much like is if (!WARN_ON(...)), because I think in general that code should still function if WARN() and similar macros are #defined to empty loops. But WARN_ON() has to be defined to return (the truth-value of) its parameter even if it doesn't print anything, so it's not that bad. OTOH, while looking at this loop, I worked out a better page iterator, so I'll post that in a little while :) .Dave.
On ke, 2016-04-20 at 14:30 +0100, Dave Gordon wrote: > The recently-added i915_gem_object_pin_map() can be further optimised > for "small" objects. To facilitate this, and simplify the error paths > before adding the new code, this patch pulls out the "mapping" part of > the operation (involving local allocations which must be undone before > return) into its own subfunction. > > The next patch will then insert the new optimisation into the middle of > the now-separated subfunction. > > This reorganisation will probably not affect the generated code, as the > compiler will most likely inline it anyway, but it makes the logical > structure a bit clearer and easier to modify. > > v2: > Restructure loop-over-pages & error check (Chris Wilson) > > Signed-off-by: Dave Gordon <david.s.gordon@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++--------------- > 1 file changed, 37 insertions(+), 21 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 6ce2c31..5344b6d 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2396,6 +2396,42 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) > return 0; > } > > +/* The 'mapping' part of i915_gem_object_pin_map() below */ > +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) > +{ > + unsigned long n_pages = obj->base.size >> PAGE_SHIFT; > + struct scatterlist *sg = obj->pages->sgl; > + struct sg_page_iter sg_iter; > + struct page **pages; > + unsigned long i = 0; > + void *addr = NULL; > + > + /* A single page can always be kmapped */ > + if (n_pages == 1) > + return kmap(sg_page(sg)); > + > + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); > + if (pages == NULL) { > + DRM_DEBUG_DRIVER("Failed to get space for pages\n"); > + return NULL; > + } > + > + for_each_sg_page(sg, &sg_iter, n_pages, 0) > + pages[i++] = sg_page_iter_page(&sg_iter); > + > + /* Check that we have the expected number of pages */ > + if (!WARN_ON(i != n_pages)) > + addr = vmap(pages, n_pages, 0, PAGE_KERNEL); > + > + if (addr == NULL) > + DRM_DEBUG_DRIVER("Failed to vmap pages\n"); > + This kind of construct is used elsewhere, too. if (WARN_ON(i != n_pages)) { DRM_DEBUG_DRIVER("Failed to vmap pages\n"); goto out; } addr = vmap(pages, n_pages, 0, PAGE_KERNEL); out: > + drm_free_large(pages); > + > + return addr; > +} > + > +/* get, pin, and map the pages of the object into kernel space */ > void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) > { > int ret; > @@ -2409,27 +2445,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) > i915_gem_object_pin_pages(obj); > > if (obj->mapping == NULL) { > - struct page **pages; > - > - pages = NULL; > - if (obj->base.size == PAGE_SIZE) > - obj->mapping = kmap(sg_page(obj->pages->sgl)); > - else > - pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, > - sizeof(*pages), > - GFP_TEMPORARY); > - if (pages != NULL) { > - struct sg_page_iter sg_iter; > - int n; > - > - n = 0; > - for_each_sg_page(obj->pages->sgl, &sg_iter, > - obj->pages->nents, 0) > - pages[n++] = sg_page_iter_page(&sg_iter); > - > - obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); > - drm_free_large(pages); > - } > + obj->mapping = i915_gem_object_map(obj); > if (obj->mapping == NULL) { > i915_gem_object_unpin_pages(obj); > return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6ce2c31..5344b6d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2396,6 +2396,42 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) return 0; } +/* The 'mapping' part of i915_gem_object_pin_map() below */ +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) +{ + unsigned long n_pages = obj->base.size >> PAGE_SHIFT; + struct scatterlist *sg = obj->pages->sgl; + struct sg_page_iter sg_iter; + struct page **pages; + unsigned long i = 0; + void *addr = NULL; + + /* A single page can always be kmapped */ + if (n_pages == 1) + return kmap(sg_page(sg)); + + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY); + if (pages == NULL) { + DRM_DEBUG_DRIVER("Failed to get space for pages\n"); + return NULL; + } + + for_each_sg_page(sg, &sg_iter, n_pages, 0) + pages[i++] = sg_page_iter_page(&sg_iter); + + /* Check that we have the expected number of pages */ + if (!WARN_ON(i != n_pages)) + addr = vmap(pages, n_pages, 0, PAGE_KERNEL); + + if (addr == NULL) + DRM_DEBUG_DRIVER("Failed to vmap pages\n"); + + drm_free_large(pages); + + return addr; +} + +/* get, pin, and map the pages of the object into kernel space */ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) { int ret; @@ -2409,27 +2445,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) i915_gem_object_pin_pages(obj); if (obj->mapping == NULL) { - struct page **pages; - - pages = NULL; - if (obj->base.size == PAGE_SIZE) - obj->mapping = kmap(sg_page(obj->pages->sgl)); - else - pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT, - sizeof(*pages), - GFP_TEMPORARY); - if (pages != NULL) { - struct sg_page_iter sg_iter; - int n; - - n = 0; - for_each_sg_page(obj->pages->sgl, &sg_iter, - obj->pages->nents, 0) - pages[n++] = sg_page_iter_page(&sg_iter); - - obj->mapping = vmap(pages, n, 0, PAGE_KERNEL); - drm_free_large(pages); - } + obj->mapping = i915_gem_object_map(obj); if (obj->mapping == NULL) { i915_gem_object_unpin_pages(obj); return ERR_PTR(-ENOMEM);
The recently-added i915_gem_object_pin_map() can be further optimised for "small" objects. To facilitate this, and simplify the error paths before adding the new code, this patch pulls out the "mapping" part of the operation (involving local allocations which must be undone before return) into its own subfunction. The next patch will then insert the new optimisation into the middle of the now-separated subfunction. This reorganisation will probably not affect the generated code, as the compiler will most likely inline it anyway, but it makes the logical structure a bit clearer and easier to modify. v2: Restructure loop-over-pages & error check (Chris Wilson) Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 21 deletions(-)