From patchwork Thu Jun 27 20:56:06 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Matthew Auld X-Patchwork-Id: 11020573 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DE78014E5 for ; Thu, 27 Jun 2019 20:56:58 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id D5A1B28703 for ; Thu, 27 Jun 2019 20:56:58 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C9F3528711; Thu, 27 Jun 2019 20:56:58 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 307F92870F for ; Thu, 27 Jun 2019 20:56:58 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 1A0F26E841; Thu, 27 Jun 2019 20:56:54 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by gabe.freedesktop.org (Postfix) with ESMTPS id A4EC26E843 for ; Thu, 27 Jun 2019 20:56:48 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga007.fm.intel.com ([10.253.24.52]) by orsmga104.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 27 Jun 2019 13:56:48 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.63,425,1557212400"; d="scan'208";a="164455627" Received: from unknown (HELO mwahaha-bdw.ger.corp.intel.com) ([10.252.4.227]) by fmsmga007.fm.intel.com with ESMTP; 27 Jun 2019 13:56:47 -0700 From: Matthew Auld To: intel-gfx@lists.freedesktop.org Date: Thu, 27 Jun 2019 21:56:06 +0100 Message-Id: <20190627205633.1143-11-matthew.auld@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190627205633.1143-1-matthew.auld@intel.com> References: <20190627205633.1143-1-matthew.auld@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH v2 10/37] drm/i915/blt: support copying objects X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP We can already clear an object with the blt, so try to do the same to support copying from one object backing store to another. Really this is just object -> object, which is not that useful yet, what we really want is two backing stores, but that will require some vma rework first, otherwise we are stuck with "tmp" objects. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Abdiel Janulgue i915); + u32 *cs; + + GEM_BUG_ON(src->size != dst->size); + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (gen >= 9) { + *cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10-2); + *cs++ = BLT_DEPTH_32 | PAGE_SIZE; + *cs++ = 0; + *cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = lower_32_bits(dst->node.start); + *cs++ = upper_32_bits(dst->node.start); + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = lower_32_bits(src->node.start); + *cs++ = upper_32_bits(src->node.start); + } else if (gen >= 8) { + *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10-2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = lower_32_bits(dst->node.start); + *cs++ = upper_32_bits(dst->node.start); + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = lower_32_bits(src->node.start); + *cs++ = upper_32_bits(src->node.start); + } else { + *cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8-2); + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE; + *cs++ = 0; + *cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4; + *cs++ = dst->node.start; + *cs++ = 0; + *cs++ = PAGE_SIZE; + *cs++ = src->node.start; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce) +{ + struct drm_i915_private *i915 = to_i915(src->base.dev); + struct i915_gem_context *ctx = ce->gem_context; + struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; + struct drm_gem_object *objs[] = { &src->base, &dst->base }; + struct ww_acquire_ctx acquire; + struct i915_vma *vma_src, *vma_dst; + struct i915_request *rq; + int err; + + vma_src = i915_vma_instance(src, vm, NULL); + if (IS_ERR(vma_src)) + return PTR_ERR(vma_src); + + err = i915_vma_pin(vma_src, 0, 0, PIN_USER); + if (unlikely(err)) + return err; + + vma_dst = i915_vma_instance(dst, vm, NULL); + if (IS_ERR(vma_dst)) + goto out_unpin_src; + + err = i915_vma_pin(vma_dst, 0, 0, PIN_USER); + if (unlikely(err)) + goto out_unpin_src; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_unpin_dst; + } + + err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire); + if (unlikely(err)) + goto out_request; + + if (src->cache_dirty & ~src->cache_coherent) + i915_gem_clflush_object(src, 0); + + if (dst->cache_dirty & ~dst->cache_coherent) + i915_gem_clflush_object(dst, 0); + + err = i915_request_await_object(rq, src, false); + if (unlikely(err)) + goto out_unlock; + + err = i915_vma_move_to_active(vma_src, rq, 0); + if (unlikely(err)) + goto out_unlock; + + err = i915_request_await_object(rq, dst, true); + if (unlikely(err)) + goto out_unlock; + + err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE); + if (unlikely(err)) + goto out_unlock; + + if (ce->engine->emit_init_breadcrumb) { + err = ce->engine->emit_init_breadcrumb(rq); + if (unlikely(err)) + goto out_unlock; + } + + err = intel_emit_vma_copy_blt(rq, vma_src, vma_dst); +out_unlock: + drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire); +out_request: + if (unlikely(err)) + i915_request_skip(rq, err); + + i915_request_add(rq); +out_unpin_dst: + i915_vma_unpin(vma_dst); +out_unpin_src: + i915_vma_unpin(vma_src); + return err; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/i915_gem_object_blt.c" #endif diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h index 7ec7de6ac0c0..17fac835f391 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h @@ -21,4 +21,12 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj, struct intel_context *ce, u32 value); +int intel_emit_vma_copy_blt(struct i915_request *rq, + struct i915_vma *src, + struct i915_vma *dst); + +int i915_gem_object_copy_blt(struct drm_i915_gem_object *src, + struct drm_i915_gem_object *dst, + struct intel_context *ce); + #endif diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c index e23d8c9e9298..1f28a12f7bb4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c @@ -94,10 +94,115 @@ static int igt_fill_blt(void *arg) return err; } +static int igt_copy_blt(void *arg) +{ + struct intel_context *ce = arg; + struct drm_i915_private *i915 = ce->gem_context->i915; + struct drm_i915_gem_object *src, *dst; + struct rnd_state prng; + IGT_TIMEOUT(end); + u32 *vaddr; + int err = 0; + + prandom_seed_state(&prng, i915_selftest.random_seed); + + do { + u32 sz = prandom_u32_state(&prng) % SZ_32M; + u32 val = prandom_u32_state(&prng); + u32 i; + + sz = round_up(sz, PAGE_SIZE); + + pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val); + + src = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(src)) { + err = PTR_ERR(vaddr); + goto err_flush; + } + + vaddr = i915_gem_object_pin_map(src, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_src; + } + + memset32(vaddr, val, src->base.size / sizeof(u32)); + + i915_gem_object_unpin_map(src); + + if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + src->cache_dirty = true; + + dst = i915_gem_object_create_internal(i915, sz); + if (IS_ERR(dst)) { + err = PTR_ERR(vaddr); + goto err_put_src; + } + + vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_put_dst; + } + + memset32(vaddr, val ^ 0xdeadbeaf, dst->base.size / sizeof(u32)); + + if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + dst->cache_dirty = true; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_copy_blt(src, dst, ce); + mutex_unlock(&i915->drm.struct_mutex); + if (err) + goto err_unpin; + + i915_gem_object_lock(dst); + err = i915_gem_object_set_to_cpu_domain(dst, false); + i915_gem_object_unlock(dst); + if (err) + goto err_unpin; + + for (i = 0; i < dst->base.size / sizeof(u32); ++i) { + if (vaddr[i] != val) { + pr_err("vaddr[%u]=%x, expected=%x\n", i, + vaddr[i], val); + err = -EINVAL; + goto err_unpin; + } + } + + i915_gem_object_unpin_map(dst); + + i915_gem_object_put(src); + i915_gem_object_put(dst); + } while (!time_after(jiffies, end)); + + goto err_flush; + +err_unpin: + i915_gem_object_unpin_map(dst); +err_put_dst: + i915_gem_object_put(dst); +err_put_src: + i915_gem_object_put(src); +err_flush: + mutex_lock(&i915->drm.struct_mutex); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + mutex_unlock(&i915->drm.struct_mutex); + + if (err == -ENOMEM) + err = 0; + + return err; +} + int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_fill_blt), + SUBTEST(igt_copy_blt), }; if (i915_terminally_wedged(i915)) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index eec31e36aca7..e3b23351669c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -182,7 +182,8 @@ #define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) #define XY_COLOR_BLT_CMD (2 << 29 | 0x50 << 22) #define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define GEN9_XY_FAST_COPY_BLT_CMD ((2<<29)|(0x42<<22)) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)) #define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) #define BLT_WRITE_A (2<<20) #define BLT_WRITE_RGB (1<<20)