diff mbox

[15/15] drm/i915: pipelined fencing, part 2: fence setup

Message ID 1268323140-12006-16-git-send-email-daniel.vetter@ffwll.ch (mailing list archive)
State Deferred, archived
Headers show

Commit Message

Daniel Vetter March 11, 2010, 3:59 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a8209df..07e73fc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2348,7 +2348,8 @@  i915_gem_object_get_pages(struct drm_gem_object *obj,
 	return 0;
 }
 
-static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg,
+					int pipelined)
 {
 	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
@@ -2356,6 +2357,7 @@  static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
+	RING_LOCALS;
 
 	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
 		    0xfffff000) << 32;
@@ -2367,10 +2369,20 @@  static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
 		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
 	val |= I965_FENCE_REG_VALID;
 
-	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
+	if (pipelined) {
+		BEGIN_LP_RING(6);
+		OUT_RING(MI_NOOP);
+		OUT_RING(MI_LOAD_REGISTER_IMM(2));
+		OUT_RING(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8));
+		OUT_RING((uint32_t) val);
+		OUT_RING(FENCE_REG_965_0 + (regnum * 8) + 4);
+		OUT_RING((uint32_t) (val >> 32));
+		ADVANCE_LP_RING();
+	} else
+		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
 }
 
-static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i965_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined)
 {
 	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
@@ -2378,6 +2390,7 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int regnum = obj_priv->fence_reg;
 	uint64_t val;
+	RING_LOCALS;
 
 	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
 		    0xfffff000) << 32;
@@ -2387,10 +2400,20 @@  static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
 		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
 	val |= I965_FENCE_REG_VALID;
 
-	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
+	if (pipelined) {
+		BEGIN_LP_RING(6);
+		OUT_RING(MI_NOOP);
+		OUT_RING(MI_LOAD_REGISTER_IMM(2));
+		OUT_RING(FENCE_REG_965_0 + (regnum * 8));
+		OUT_RING((uint32_t) val);
+		OUT_RING(FENCE_REG_965_0 + (regnum * 8) + 4);
+		OUT_RING((uint32_t) (val >> 32));
+		ADVANCE_LP_RING();
+	} else
+		I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
 }
 
-static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i915_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined)
 {
 	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
@@ -2400,6 +2423,7 @@  static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
 	int tile_width;
 	uint32_t fence_reg, val;
 	uint32_t pitch_val;
+	RING_LOCALS;
 
 	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
@@ -2429,10 +2453,19 @@  static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
 		fence_reg = FENCE_REG_830_0 + (regnum * 4);
 	else
 		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
-	I915_WRITE(fence_reg, val);
+
+	if (pipelined) {
+		BEGIN_LP_RING(4);
+		OUT_RING(MI_NOOP);
+		OUT_RING(MI_LOAD_REGISTER_IMM(1));
+		OUT_RING(fence_reg);
+		OUT_RING(val);
+		ADVANCE_LP_RING();
+	} else
+		I915_WRITE(fence_reg, val);
 }
 
-static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
+static void i830_write_fence_reg(struct drm_i915_fence_reg *reg, int pipelined)
 {
 	struct drm_gem_object *obj = reg->obj;
 	struct drm_device *dev = obj->dev;
@@ -2442,6 +2475,7 @@  static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 	uint32_t val;
 	uint32_t pitch_val;
 	uint32_t fence_size_bits;
+	RING_LOCALS;
 
 	if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
 	    (obj_priv->gtt_offset & (obj->size - 1))) {
@@ -2463,7 +2497,15 @@  static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
 	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
 	val |= I830_FENCE_REG_VALID;
 
-	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
+	if (pipelined) {
+		BEGIN_LP_RING(4);
+		OUT_RING(MI_NOOP);
+		OUT_RING(MI_LOAD_REGISTER_IMM(1));
+		OUT_RING(FENCE_REG_830_0 + (regnum * 4));
+		OUT_RING(val);
+		ADVANCE_LP_RING();
+	} else
+		I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
 }
 
 static int i915_find_fence_reg(struct drm_device *dev, int pipelined)
@@ -2593,21 +2635,14 @@  i915_gem_object_get_fence_reg(struct drm_gem_object *obj, int pipelined)
 	else
 		reg->setup_seqno = 0;
 
-	/* WIP: Synchronize again for the not-yet-pipeplined stuff */
-	if (pipelined && reg->last_rendering_seqno != 0) {
-		ret = i915_wait_request(dev, reg->last_rendering_seqno);
-		if (ret != 0)
-			return ret;
-	}
-
 	if (IS_GEN6(dev))
-		sandybridge_write_fence_reg(reg);
+		sandybridge_write_fence_reg(reg, pipelined);
 	else if (IS_I965G(dev))
-		i965_write_fence_reg(reg);
+		i965_write_fence_reg(reg, pipelined);
 	else if (IS_I9XX(dev))
-		i915_write_fence_reg(reg);
+		i915_write_fence_reg(reg, pipelined);
 	else
-		i830_write_fence_reg(reg);
+		i830_write_fence_reg(reg, pipelined);
 
 	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
 			obj_priv->tiling_mode);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3d59862..851a0bd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -183,7 +183,12 @@ 
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
 #define   MI_STORE_DWORD_INDEX_SHIFT 2
-#define MI_LOAD_REGISTER_IMM	MI_INSTR(0x22, 1)
+/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM:
+ * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw
+ *   simply ignores the register load under certain conditions.
+ * - One can actually load arbitrary many arbitrary registers: Simply issue x
+ *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold! */
+#define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*x - 1)
 #define MI_BATCH_BUFFER		MI_INSTR(0x30, 1)
 #define   MI_BATCH_NON_SECURE	(1)
 #define   MI_BATCH_NON_SECURE_I965 (1<<8)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index c01dadb..9a02ccf 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1271,8 +1271,7 @@  intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_gem_object *obj)
 	 * framebuffer compression.  For simplicity, we always install
 	 * a fence as the cost is not that onerous.
 	 */
-	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
+	if (obj_priv->tiling_mode != I915_TILING_NONE) {
 		/* FIXME: Check whether pipelined fencing makes
 		 * sense for the pageflip. */
 		ret = i915_gem_object_get_fence_reg(obj, 0);