From patchwork Thu Mar 11 15:59:00 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Vetter X-Patchwork-Id: 84946 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o2BFxJCQ010737 for ; Thu, 11 Mar 2010 15:59:59 GMT Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D45379F7CD; Thu, 11 Mar 2010 07:59:21 -0800 (PST) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mail.ffwll.ch (cable-static-49-187.intergga.ch [157.161.49.187]) by gabe.freedesktop.org (Postfix) with ESMTP id 94FD89F799 for ; Thu, 11 Mar 2010 07:59:18 -0800 (PST) Received: by mail.ffwll.ch (Postfix, from userid 1000) id CCDC320C22B; Thu, 11 Mar 2010 16:59:17 +0100 (CET) X-Spam-ASN: X-Spam-Checker-Version: SpamAssassin 3.2.5 (2008-06-10) on orange.ffwll.ch X-Spam-Level: X-Spam-Hammy: 0.000-+--struct, 0.000-+--100644, 0.000-+--signed-off-by X-Spam-Status: No, score=-4.4 required=6.0 tests=ALL_TRUSTED,BAYES_00 autolearn=ham version=3.2.5 X-Spam-Spammy: 0.997-1--Official, 0.995-1--4th, 0.993-1--screws Received: from biene (unknown [192.168.23.129]) by mail.ffwll.ch (Postfix) with ESMTP id 8CF6F20C22C; Thu, 11 Mar 2010 16:58:37 +0100 (CET) Received: from daniel by biene with local (Exim 4.71) (envelope-from ) id 1Npkmw-00039A-9m; Thu, 11 Mar 2010 16:59:06 +0100 From: Daniel Vetter To: intel-gfx@lists.freedesktop.org Date: Thu, 11 Mar 2010 16:59:00 +0100 Message-Id: <1268323140-12006-16-git-send-email-daniel.vetter@ffwll.ch> X-Mailer: git-send-email 1.6.6.1 In-Reply-To: <1268323140-12006-1-git-send-email-daniel.vetter@ffwll.ch> References: <1268323140-12006-1-git-send-email-daniel.vetter@ffwll.ch> Cc: Daniel Vetter Subject: [Intel-gfx] [PATCH 15/15] drm/i915: pipelined fencing, part 2: fence setup X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.11 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: intel-gfx-bounces@lists.freedesktop.org Errors-To: intel-gfx-bounces@lists.freedesktop.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Thu, 11 Mar 2010 15:59:59 +0000 (UTC) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a8209df..07e73fc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2348,7 +2348,8 @@ i915_gem_object_get_pages(struct drm_gem_object *obj, return 0; } -static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) +static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg, + int pipelined) { struct drm_gem_object *obj = reg->obj; struct drm_device *dev = obj->dev; @@ -2356,6 +2357,7 @@ static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) struct drm_i915_gem_object *obj_priv = obj->driver_private; int regnum = obj_priv->fence_reg; uint64_t val; + RING_LOCALS; val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 0xfffff000) << 32; @@ -2367,10 +2369,20 @@ static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; - I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); + if (pipelined) { + BEGIN_LP_RING(6); + OUT_RING(MI_NOOP); + OUT_RING(MI_LOAD_REGISTER_IMM(2)); + OUT_RING(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8)); + OUT_RING((uint32_t) val); + OUT_RING(FENCE_REG_965_0 + (regnum * 8) + 4); + OUT_RING((uint32_t) (val >> 32)); + ADVANCE_LP_RING(); + } else + I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val); } -static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) +static void i965_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined) { struct drm_gem_object *obj = reg->obj; struct drm_device *dev = obj->dev; @@ -2378,6 +2390,7 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) struct drm_i915_gem_object *obj_priv = obj->driver_private; int regnum = obj_priv->fence_reg; uint64_t val; + RING_LOCALS; val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) & 0xfffff000) << 32; @@ -2387,10 +2400,20 @@ static void i965_write_fence_reg(struct drm_i915_fence_reg *reg) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; - I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); + if (pipelined) { + BEGIN_LP_RING(6); + OUT_RING(MI_NOOP); + OUT_RING(MI_LOAD_REGISTER_IMM(2)); + OUT_RING(FENCE_REG_965_0 + (regnum * 8)); + OUT_RING((uint32_t) val); + OUT_RING(FENCE_REG_965_0 + (regnum * 8) + 4); + OUT_RING((uint32_t) (val >> 32)); + ADVANCE_LP_RING(); + } else + I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val); } -static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) +static void i915_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined) { struct drm_gem_object *obj = reg->obj; struct drm_device *dev = obj->dev; @@ -2400,6 +2423,7 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) int tile_width; uint32_t fence_reg, val; uint32_t pitch_val; + RING_LOCALS; if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { @@ -2429,10 +2453,19 @@ static void i915_write_fence_reg(struct drm_i915_fence_reg *reg) fence_reg = FENCE_REG_830_0 + (regnum * 4); else fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4); - I915_WRITE(fence_reg, val); + + if (pipelined) { + BEGIN_LP_RING(4); + OUT_RING(MI_NOOP); + OUT_RING(MI_LOAD_REGISTER_IMM(1)); + OUT_RING(fence_reg); + OUT_RING(val); + ADVANCE_LP_RING(); + } else + I915_WRITE(fence_reg, val); } -static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) +static void i830_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined) { struct drm_gem_object *obj = reg->obj; struct drm_device *dev = obj->dev; @@ -2442,6 +2475,7 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) uint32_t val; uint32_t pitch_val; uint32_t fence_size_bits; + RING_LOCALS; if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) || (obj_priv->gtt_offset & (obj->size - 1))) { @@ -2463,7 +2497,15 @@ static void i830_write_fence_reg(struct drm_i915_fence_reg *reg) val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); + if (pipelined) { + BEGIN_LP_RING(4); + OUT_RING(MI_NOOP); + OUT_RING(MI_LOAD_REGISTER_IMM(1)); + OUT_RING(FENCE_REG_830_0 + (regnum * 4)); + OUT_RING(val); + ADVANCE_LP_RING(); + } else + I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val); } static int i915_find_fence_reg(struct drm_device *dev, int pipelined) @@ -2593,21 +2635,14 @@ i915_gem_object_get_fence_reg(struct drm_gem_object *obj, int pipelined) else reg->setup_seqno = 0; - /* WIP: Synchronize again for the not-yet-pipeplined stuff */ - if (pipelined && reg->last_rendering_seqno != 0) { - ret = i915_wait_request(dev, reg->last_rendering_seqno); - if (ret != 0) - return ret; - } - if (IS_GEN6(dev)) - sandybridge_write_fence_reg(reg); + sandybridge_write_fence_reg(reg, pipelined); else if (IS_I965G(dev)) - i965_write_fence_reg(reg); + i965_write_fence_reg(reg, pipelined); else if (IS_I9XX(dev)) - i915_write_fence_reg(reg); + i915_write_fence_reg(reg, pipelined); else - i830_write_fence_reg(reg); + i830_write_fence_reg(reg, pipelined); trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg, obj_priv->tiling_mode); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3d59862..851a0bd 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -183,7 +183,12 @@ #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) #define MI_STORE_DWORD_INDEX_SHIFT 2 -#define MI_LOAD_REGISTER_IMM MI_INSTR(0x22, 1) +/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: + * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw + * simply ignores the register load under certain conditions. + * - One can actually load arbitrary many arbitrary registers: Simply issue x + * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */ +#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*x - 1) #define MI_BATCH_BUFFER MI_INSTR(0x30, 1) #define MI_BATCH_NON_SECURE (1) #define MI_BATCH_NON_SECURE_I965 (1<<8) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c01dadb..9a02ccf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1271,8 +1271,7 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_gem_object *obj) * framebuffer compression. For simplicity, we always install * a fence as the cost is not that onerous. */ - if (obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) { + if (obj_priv->tiling_mode != I915_TILING_NONE) { /* FIXME: Check whether pipelined fencing makes * sense for the pageflip. */ ret = i915_gem_object_get_fence_reg(obj, 0);