[128/190] drm/i915: Extract i915_gem_obj_prepare_shmem_write()

Message ID	1452509174-16671-42-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State	New, archived
Headers	show Return-Path: <intel-gfx-bounces@lists.freedesktop.org> From: Chris Wilson <chris@chris-wilson.co.uk> To: intel-gfx@lists.freedesktop.org Date: Mon, 11 Jan 2016 10:45:12 +0000 Message-Id: <1452509174-16671-42-git-send-email-chris@chris-wilson.co.uk> In-Reply-To: <1452509174-16671-1-git-send-email-chris@chris-wilson.co.uk> References: <1452503961-14837-1-git-send-email-chris@chris-wilson.co.uk> <1452509174-16671-1-git-send-email-chris@chris-wilson.co.uk> Subject: [Intel-gfx] [PATCH 128/190] drm/i915: Extract i915_gem_obj_prepare_shmem_write() Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" <intel-gfx-bounces@lists.freedesktop.org>

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 814d894ed925..fae127166e2c 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -902,7 +902,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, u32 batch_start_offset, u32 batch_len) { - int needs_clflush = 0; + unsigned needs_clflush; void *src_base, *src; void *dst = NULL; int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 693f472bd604..45a0da0947cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2761,7 +2761,11 @@ void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush); + unsigned *needs_clflush); +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned *needs_clflush); +#define CLFLUSH_BEFORE 0x1 +#define CLFLUSH_AFTER 0x2 int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 01c20a336c04..e18c0d4d24ad 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -492,34 +492,92 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, * flush the object from the CPU cache. */ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush) + unsigned *needs_clflush) { int ret; *needs_clflush = 0; - if (!obj->base.filp) return -EINVAL; if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { + ret = i915_gem_object_wait_rendering(obj, true); + if (ret) + return ret; + /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); - ret = i915_gem_object_wait_rendering(obj, true); + } + + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + if (*needs_clflush && !cpu_has_clflush) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); + if (ret) { + i915_gem_object_unpin_pages(obj); + return ret; + } + *needs_clflush = 0; + } + + return 0; +} + +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + if (!obj->base.filp) + return -EINVAL; + + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; + + /* If we're not in the cpu write domain, set ourself into the + * gtt write domain and manually flush cachelines (as required). + * This optimizes for the case when the gpu will use the data + * right away and we therefore have to clflush anyway. + */ + *needs_clflush |= cpu_write_needs_clflush(obj) << 1; } + /* Same trick applies to invalidate partially written cachelines read + * before writing. + */ + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, + obj->cache_level); + ret = i915_gem_object_get_pages(obj); if (ret) return ret; i915_gem_object_pin_pages(obj); - return ret; + if (*needs_clflush && !cpu_has_clflush) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) { + i915_gem_object_unpin_pages(obj); + return ret; + } + *needs_clflush = 0; + } + + intel_fb_obj_invalidate(obj, ORIGIN_CPU); + obj->dirty = 1; + return 0; } /* Per-page copy function for the shmem pread fastpath. @@ -916,41 +974,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; - int needs_clflush_after = 0; - int needs_clflush_before = 0; + unsigned needs_clflush; struct sg_page_iter sg_iter; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - /* If we're not in the cpu write domain, set ourself into the gtt - * write domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will use the data - * right away and we therefore have to clflush anyway. */ - needs_clflush_after = cpu_write_needs_clflush(obj); - ret = i915_gem_object_wait_rendering(obj, false); - if (ret) - return ret; - } - /* Same trick applies to invalidate partially written cachelines read - * before writing. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - needs_clflush_before = - !cpu_cache_is_coherent(dev, obj->cache_level); - - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - - i915_gem_object_pin_pages(obj); - + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = to_user_ptr(args->data_ptr); offset = args->offset; - obj->dirty = 1; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -974,7 +1008,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush_before && + partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE && ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1)); @@ -984,7 +1018,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); if (ret == 0) goto next_page; @@ -993,7 +1027,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); mutex_lock(&dev->struct_mutex); @@ -1015,14 +1049,14 @@ out: * cachelines in-line while writing and the object moved * out of the cpu write domain while we've dropped the lock. */ - if (!needs_clflush_after && + if (!(needs_clflush & CLFLUSH_AFTER) && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (i915_gem_clflush_object(obj, obj->pin_display)) - needs_clflush_after = true; + needs_clflush |= CLFLUSH_AFTER; } } - if (needs_clflush_after) + if (needs_clflush & CLFLUSH_AFTER) i915_gem_chipset_flush(dev); else obj->cache_dirty = true;

[128/190] drm/i915: Extract i915_gem_obj_prepare_shmem_write()

Commit Message

Patch