diff mbox

drm/i915: Add intel implementation of the pageflip ioctl

Message ID 20091119104753.19dd6b47@jbarnes-piketon (mailing list archive)
State Accepted
Headers show

Commit Message

Jesse Barnes Nov. 19, 2009, 6:47 p.m. UTC
None
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 835625b..75acb5d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -536,6 +536,10 @@  typedef struct drm_i915_private {
 	/* indicate whether the LVDS_BORDER should be enabled or not */
 	unsigned int lvds_border_bits;
 
+	struct drm_crtc *plane_to_crtc_mapping[2];
+	struct drm_crtc *pipe_to_crtc_mapping[2];
+	wait_queue_head_t pending_flip_queue;
+
 	/* Reclocking support */
 	bool render_reclock_avail;
 	bool lvds_downclock_avail;
@@ -635,6 +639,13 @@  struct drm_i915_gem_object {
 	 * Advice: are the backing pages purgeable?
 	 */
 	int madv;
+
+	/**
+	 * Number of crtcs where this object is currently the fb, but
+	 * will be page flipped away on the next vblank.  When it
+	 * reaches 0, dev_priv->pending_flip_queue will be woken up.
+	 */
+	atomic_t pending_flip;
 };
 
 /**
@@ -826,6 +837,7 @@  void i915_gem_free_all_phys_object(struct drm_device *dev);
 int i915_gem_object_get_pages(struct drm_gem_object *obj);
 void i915_gem_object_put_pages(struct drm_gem_object *obj);
 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv);
+void i915_gem_object_flush_write_domain(struct drm_gem_object *obj);
 
 void i915_gem_shrinker_init(void);
 void i915_gem_shrinker_exit(void);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2065b8f..55ed06f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2771,6 +2771,22 @@  i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
 					    old_write_domain);
 }
 
+void
+i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
+{
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		i915_gem_object_flush_gtt_write_domain(obj);
+		break;
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_object_flush_cpu_write_domain(obj);
+		break;
+	default:
+		i915_gem_object_flush_gpu_write_domain(obj);
+		break;
+	}
+}
+
 /**
  * Moves a single object to the GTT read, and possibly write domain.
  *
@@ -3536,6 +3552,41 @@  i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
 	return 0;
 }
 
+static int
+i915_gem_wait_for_pending_flip(struct drm_device *dev,
+			       struct drm_gem_object **object_list,
+			       int count)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv;
+	DEFINE_WAIT(wait);
+	int i, ret = 0;
+
+	for (;;) {
+		prepare_to_wait(&dev_priv->pending_flip_queue,
+				&wait, TASK_INTERRUPTIBLE);
+		for (i = 0; i < count; i++) {
+			obj_priv = object_list[i]->driver_private;
+			if (atomic_read(&obj_priv->pending_flip) > 0)
+				break;
+		}
+		if (i == count)
+			break;
+
+		if (!signal_pending(current)) {
+			mutex_unlock(&dev->struct_mutex);
+			schedule();
+			mutex_lock(&dev->struct_mutex);
+			continue;
+		}
+		ret = -ERESTARTSYS;
+		break;
+	}
+	finish_wait(&dev_priv->pending_flip_queue, &wait);
+
+	return ret;
+}
+
 int
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
@@ -3551,7 +3602,7 @@  i915_gem_execbuffer(struct drm_device *dev, void *data,
 	int ret, ret2, i, pinned = 0;
 	uint64_t exec_offset;
 	uint32_t seqno, flush_domains, reloc_index;
-	int pin_tries;
+	int pin_tries, flips;
 
 #if WATCH_EXEC
 	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
@@ -3623,6 +3674,7 @@  i915_gem_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Look up object handles */
+	flips = 0;
 	for (i = 0; i < args->buffer_count; i++) {
 		object_list[i] = drm_gem_object_lookup(dev, file_priv,
 						       exec_list[i].handle);
@@ -3641,6 +3693,14 @@  i915_gem_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 		}
 		obj_priv->in_execbuffer = true;
+		flips += atomic_read(&obj_priv->pending_flip);
+	}
+
+	if (flips > 0) {
+		ret = i915_gem_wait_for_pending_flip(dev, object_list,
+						     args->buffer_count);
+		if (ret)
+			goto err;
 	}
 
 	/* Pin and relocate */
@@ -4625,8 +4685,8 @@  i915_gem_load(struct drm_device *dev)
 			for (i = 0; i < 8; i++)
 				I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
 	}
-
 	i915_gem_detect_bit_6_swizzle(dev);
+	init_waitqueue_head(&dev_priv->pending_flip_queue);
 }
 
 /*
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 024fb95..86abd6b 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -46,6 +46,8 @@ 
 #define I915_INTERRUPT_ENABLE_FIX (I915_ASLE_INTERRUPT |		 \
 				   I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \
 				   I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | \
+				   I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | \
+				   I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT | \
 				   I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT)
 
 /** Interrupts that we mask and unmask at runtime. */
@@ -664,14 +666,22 @@  irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 			mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
 		}
 
+		if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT)
+			intel_prepare_page_flip(dev, 0);
+
+		if (iir & I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT)
+			intel_prepare_page_flip(dev, 1);
+
 		if (pipea_stats & vblank_status) {
 			vblank++;
 			drm_handle_vblank(dev, 0);
+			intel_finish_page_flip(dev, 0);
 		}
 
 		if (pipeb_stats & vblank_status) {
 			vblank++;
 			drm_handle_vblank(dev, 1);
+			intel_finish_page_flip(dev, 1);
 		}
 
 		if ((pipeb_stats & I915_LEGACY_BLC_EVENT_STATUS) ||
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b11a682..210fe85 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -157,6 +157,8 @@ 
 #define   MI_OVERLAY_ON		(0x1<<21)
 #define   MI_OVERLAY_OFF	(0x2<<21)
 #define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0)
+#define MI_DISPLAY_FLIP		MI_INSTR(0x14, 2)
+#define   MI_DISPLAY_FLIP_PLANE(n) ((n) << 20)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 33113c7..4cb3294 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1190,6 +1190,50 @@  out_disable:
 }
 
 static int
+intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_gem_object *obj)
+{
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	u32 alignment;
+	int ret;
+
+	switch (obj_priv->tiling_mode) {
+	case I915_TILING_NONE:
+		alignment = 64 * 1024;
+		break;
+	case I915_TILING_X:
+		/* pin() will align the object as required by fence */
+		alignment = 0;
+		break;
+	case I915_TILING_Y:
+		/* FIXME: Is this true? */
+		DRM_ERROR("Y tiled not allowed for scan out buffers\n");
+		return -EINVAL;
+	default:
+		BUG();
+	}
+
+	ret = i915_gem_object_pin(obj, alignment);
+	if (ret != 0)
+		return ret;
+
+	/* Install a fence for tiled scan-out. Pre-i965 always needs a
+	 * fence, whereas 965+ only requires a fence if using
+	 * framebuffer compression.  For simplicity, we always install
+	 * a fence as the cost is not that onerous.
+	 */
+	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
+	    obj_priv->tiling_mode != I915_TILING_NONE) {
+		ret = i915_gem_object_get_fence_reg(obj);
+		if (ret != 0) {
+			i915_gem_object_unpin(obj);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
 intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 		    struct drm_framebuffer *old_fb)
 {
@@ -1208,7 +1252,7 @@  intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE;
 	int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF);
 	int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR;
-	u32 dspcntr, alignment;
+	u32 dspcntr;
 	int ret;
 
 	/* no fb bound */
@@ -1230,24 +1274,8 @@  intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	obj = intel_fb->obj;
 	obj_priv = obj->driver_private;
 
-	switch (obj_priv->tiling_mode) {
-	case I915_TILING_NONE:
-		alignment = 64 * 1024;
-		break;
-	case I915_TILING_X:
-		/* pin() will align the object as required by fence */
-		alignment = 0;
-		break;
-	case I915_TILING_Y:
-		/* FIXME: Is this true? */
-		DRM_ERROR("Y tiled not allowed for scan out buffers\n");
-		return -EINVAL;
-	default:
-		BUG();
-	}
-
 	mutex_lock(&dev->struct_mutex);
-	ret = i915_gem_object_pin(obj, alignment);
+	ret = intel_pin_and_fence_fb_obj(dev, obj);
 	if (ret != 0) {
 		mutex_unlock(&dev->struct_mutex);
 		return ret;
@@ -1260,20 +1288,6 @@  intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 		return ret;
 	}
 
-	/* Install a fence for tiled scan-out. Pre-i965 always needs a fence,
-	 * whereas 965+ only requires a fence if using framebuffer compression.
-	 * For simplicity, we always install a fence as the cost is not that onerous.
-	 */
-	if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
-	    obj_priv->tiling_mode != I915_TILING_NONE) {
-		ret = i915_gem_object_get_fence_reg(obj);
-		if (ret != 0) {
-			i915_gem_object_unpin(obj);
-			mutex_unlock(&dev->struct_mutex);
-			return ret;
-		}
-	}
-
 	dspcntr = I915_READ(dspcntr_reg);
 	/* Mask out pixel format bits in case we change it */
 	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
@@ -4034,6 +4048,158 @@  static void intel_crtc_destroy(struct drm_crtc *crtc)
 	kfree(intel_crtc);
 }
 
+struct intel_unpin_work {
+	struct work_struct work;
+	struct drm_device *dev;
+	struct drm_gem_object *obj;
+	struct drm_pending_vblank_event *event;
+	int pending;
+};
+
+static void intel_unpin_work_fn(struct work_struct *__work)
+{
+	struct intel_unpin_work *work =
+		container_of(__work, struct intel_unpin_work, work);
+
+	mutex_lock(&work->dev->struct_mutex);
+	i915_gem_object_unpin(work->obj);
+	drm_gem_object_unreference(work->obj);
+	mutex_unlock(&work->dev->struct_mutex);
+	kfree(work);
+}
+
+void intel_finish_page_flip(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_unpin_work *work;
+	struct drm_i915_gem_object *obj_priv;
+	struct drm_pending_vblank_event *e;
+	struct timeval now;
+	unsigned long flags;
+
+	/* Ignore early vblank irqs */
+	if (intel_crtc == NULL)
+		return;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	work = intel_crtc->unpin_work;
+	if (work == NULL || !work->pending) {
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+		return;
+	}
+
+	intel_crtc->unpin_work = NULL;
+	drm_vblank_put(dev, intel_crtc->pipe);
+
+	if (work->event) {
+		e = work->event;
+		do_gettimeofday(&now);
+		e->event.sequence = drm_vblank_count(dev, intel_crtc->pipe);
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+		list_add_tail(&e->base.link,
+			      &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+	}
+
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	obj_priv = work->obj->driver_private;
+	if (atomic_dec_and_test(&obj_priv->pending_flip))
+		DRM_WAKEUP(&dev_priv->pending_flip_queue);
+	schedule_work(&work->work);
+}
+
+void intel_prepare_page_flip(struct drm_device *dev, int plane)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc =
+		to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]);
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	if (intel_crtc->unpin_work)
+		intel_crtc->unpin_work->pending = 1;
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static int intel_crtc_page_flip(struct drm_crtc *crtc,
+				struct drm_framebuffer *fb,
+				struct drm_pending_vblank_event *event)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_framebuffer *intel_fb;
+	struct drm_i915_gem_object *obj_priv;
+	struct drm_gem_object *obj;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct intel_unpin_work *work;
+	unsigned long flags;
+	int ret;
+	RING_LOCALS;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	mutex_lock(&dev->struct_mutex);
+
+	work->event = event;
+	work->dev = crtc->dev;
+	intel_fb = to_intel_framebuffer(crtc->fb);
+	work->obj = intel_fb->obj;
+	INIT_WORK(&work->work, intel_unpin_work_fn);
+
+	/* We borrow the event spin lock for protecting unpin_work */
+	spin_lock_irqsave(&dev->event_lock, flags);
+	if (intel_crtc->unpin_work) {
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+		kfree(work);
+		mutex_unlock(&dev->struct_mutex);
+		return -EBUSY;
+	}
+	intel_crtc->unpin_work = work;
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+
+	intel_fb = to_intel_framebuffer(fb);
+	obj = intel_fb->obj;
+
+	ret = intel_pin_and_fence_fb_obj(dev, obj);
+	if (ret != 0) {
+		kfree(work);
+		mutex_unlock(&dev->struct_mutex);
+		return ret;
+	}
+
+	/* Reference the old fb object for the scheduled work. */
+	drm_gem_object_reference(work->obj);
+
+	crtc->fb = fb;
+	i915_gem_object_flush_write_domain(obj);
+	drm_vblank_get(dev, intel_crtc->pipe);
+	obj_priv = obj->driver_private;
+	atomic_inc(&obj_priv->pending_flip);
+
+	BEGIN_LP_RING(4);
+	OUT_RING(MI_DISPLAY_FLIP |
+		 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
+	OUT_RING(fb->pitch);
+	if (IS_I965G(dev)) {
+		OUT_RING(obj_priv->gtt_offset | obj_priv->tiling_mode);
+		OUT_RING((fb->width << 16) | fb->height);
+	} else {
+		OUT_RING(obj_priv->gtt_offset);
+		OUT_RING(MI_NOOP);
+	}
+	ADVANCE_LP_RING();
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 static const struct drm_crtc_helper_funcs intel_helper_funcs = {
 	.dpms = intel_crtc_dpms,
 	.mode_fixup = intel_crtc_mode_fixup,
@@ -4050,12 +4216,14 @@  static const struct drm_crtc_funcs intel_crtc_funcs = {
 	.gamma_set = intel_crtc_gamma_set,
 	.set_config = drm_crtc_helper_set_config,
 	.destroy = intel_crtc_destroy,
+	.page_flip = intel_crtc_page_flip,
 };
 
 
 static void intel_crtc_init(struct drm_device *dev, int pipe)
 {
 	struct intel_crtc *intel_crtc;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int i;
 
 	intel_crtc = kzalloc(sizeof(struct intel_crtc) + (INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL);
@@ -4081,6 +4249,11 @@  static void intel_crtc_init(struct drm_device *dev, int pipe)
 		intel_crtc->plane = ((pipe == 0) ? 1 : 0);
 	}
 
+	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
+	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
+	dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base;
+	dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base;
+
 	intel_crtc->cursor_addr = 0;
 	intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 4972405..3547bba 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -148,6 +148,7 @@  struct intel_crtc {
 	struct timer_list idle_timer;
 	bool lowfreq_avail;
 	struct intel_overlay *overlay;
+	struct intel_unpin_work *unpin_work;
 };
 
 #define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
@@ -211,6 +212,8 @@  extern int intel_framebuffer_create(struct drm_device *dev,
 				    struct drm_framebuffer **fb,
 				    struct drm_gem_object *obj);
 
+extern void intel_prepare_page_flip(struct drm_device *dev, int plane);
+extern void intel_finish_page_flip(struct drm_device *dev, int pipe);
 extern void intel_setup_overlay(struct drm_device *dev);
 extern void intel_cleanup_overlay(struct drm_device *dev);
 extern int intel_overlay_switch_off(struct intel_overlay *overlay);