qxl: rewrite framebuffer support
diff mbox

Message ID 1430826769-302-1-git-send-email-kraxel@redhat.com
State New
Headers show

Commit Message

Gerd Hoffmann May 5, 2015, 11:52 a.m. UTC
Completely different approach:  Instead of encoding each and every
framebuffer update as spice operation simply update the shadow
framebuffer and maintain a dirty rectangle.  Also schedule a worker
to push an update for the dirty rectangle as spice operation.  Usually
a bunch of dirty rectangle updates are collected before the worker
actually runs.

What changes:  Updates get batched now.  Instead of sending tons of
small updates a few large ones are sent.  When the same region is
updated multiple times within a short timeframe (scrolling multiple
lines for example) we send a single update only.  Spice server has an
easier job now:  The dependency tree for display operations which spice
server maintains for lazy rendering is alot smaller now.  Spice server's
image compression probably works better too with the larger image blits.

Net effect:  framebuffer console @ qxldrmfb is an order of magnitude
faster now.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/qxl/qxl_fb.c | 275 +++++++++----------------------------------
 1 file changed, 57 insertions(+), 218 deletions(-)

Comments

Dave Airlie May 7, 2015, 3:09 a.m. UTC | #1
On 5 May 2015 at 21:52, Gerd Hoffmann <kraxel@redhat.com> wrote:
> Completely different approach:  Instead of encoding each and every
> framebuffer update as spice operation simply update the shadow
> framebuffer and maintain a dirty rectangle.  Also schedule a worker
> to push an update for the dirty rectangle as spice operation.  Usually
> a bunch of dirty rectangle updates are collected before the worker
> actually runs.
>
> What changes:  Updates get batched now.  Instead of sending tons of
> small updates a few large ones are sent.  When the same region is
> updated multiple times within a short timeframe (scrolling multiple
> lines for example) we send a single update only.  Spice server has an
> easier job now:  The dependency tree for display operations which spice
> server maintains for lazy rendering is alot smaller now.  Spice server's
> image compression probably works better too with the larger image blits.
>
> Net effect:  framebuffer console @ qxldrmfb is an order of magnitude
> faster now.
>
> Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>

Looks good, I often wondered if we should have done this, spice
protocol overhead
seemed quite high.

I'll merge this into drm-next.

Dave.

Patch
diff mbox

diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index f778c0e..6b6e57e 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -37,21 +37,6 @@ 
 
 #define QXL_DIRTY_DELAY (HZ / 30)
 
-#define QXL_FB_OP_FILLRECT 1
-#define QXL_FB_OP_COPYAREA 2
-#define QXL_FB_OP_IMAGEBLIT 3
-
-struct qxl_fb_op {
-	struct list_head head;
-	int op_type;
-	union {
-		struct fb_fillrect fr;
-		struct fb_copyarea ca;
-		struct fb_image ib;
-	} op;
-	void *img_data;
-};
-
 struct qxl_fbdev {
 	struct drm_fb_helper helper;
 	struct qxl_framebuffer	qfb;
@@ -66,7 +51,6 @@  struct qxl_fbdev {
 	/* dirty memory logging */
 	struct {
 		spinlock_t lock;
-		bool active;
 		unsigned x1;
 		unsigned y1;
 		unsigned x2;
@@ -105,23 +89,33 @@  static void qxl_fb_dirty_flush(struct fb_info *info)
 	struct qxl_device *qdev = qfbdev->qdev;
 	struct qxl_fb_image qxl_fb_image;
 	struct fb_image *image = &qxl_fb_image.fb_image;
+	unsigned long flags;
 	u32 x1, x2, y1, y2;
 
 	/* TODO: hard coding 32 bpp */
 	int stride = qfbdev->qfb.base.pitches[0];
 
+	spin_lock_irqsave(&qfbdev->dirty.lock, flags);
+
 	x1 = qfbdev->dirty.x1;
 	x2 = qfbdev->dirty.x2;
 	y1 = qfbdev->dirty.y1;
 	y2 = qfbdev->dirty.y2;
+	qfbdev->dirty.x1 = 0;
+	qfbdev->dirty.x2 = 0;
+	qfbdev->dirty.y1 = 0;
+	qfbdev->dirty.y2 = 0;
+
+	spin_unlock_irqrestore(&qfbdev->dirty.lock, flags);
+
 	/*
 	 * we are using a shadow draw buffer, at qdev->surface0_shadow
 	 */
 	qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]", x1, x2, y1, y2);
 	image->dx = x1;
 	image->dy = y1;
-	image->width = x2 - x1;
-	image->height = y2 - y1;
+	image->width = x2 - x1 + 1;
+	image->height = y2 - y1 + 1;
 	image->fg_color = 0xffffffff; /* unused, just to avoid uninitialized
 					 warnings */
 	image->bg_color = 0;
@@ -136,10 +130,37 @@  static void qxl_fb_dirty_flush(struct fb_info *info)
 
 	qxl_fb_image_init(&qxl_fb_image, qdev, info, NULL);
 	qxl_draw_opaque_fb(&qxl_fb_image, stride);
-	qfbdev->dirty.x1 = 0;
-	qfbdev->dirty.x2 = 0;
-	qfbdev->dirty.y1 = 0;
-	qfbdev->dirty.y2 = 0;
+}
+
+static void qxl_dirty_update(struct qxl_fbdev *qfbdev,
+			     int x, int y, int width, int height)
+{
+	struct qxl_device *qdev = qfbdev->qdev;
+	unsigned long flags;
+	int x2, y2;
+
+	x2 = x + width - 1;
+	y2 = y + height - 1;
+
+	spin_lock_irqsave(&qfbdev->dirty.lock, flags);
+
+	if (qfbdev->dirty.y1 < y)
+		y = qfbdev->dirty.y1;
+	if (qfbdev->dirty.y2 > y2)
+		y2 = qfbdev->dirty.y2;
+	if (qfbdev->dirty.x1 < x)
+		x = qfbdev->dirty.x1;
+	if (qfbdev->dirty.x2 > x2)
+		x2 = qfbdev->dirty.x2;
+
+	qfbdev->dirty.x1 = x;
+	qfbdev->dirty.x2 = x2;
+	qfbdev->dirty.y1 = y;
+	qfbdev->dirty.y2 = y2;
+
+	spin_unlock_irqrestore(&qfbdev->dirty.lock, flags);
+
+	schedule_work(&qdev->fb_work);
 }
 
 static void qxl_deferred_io(struct fb_info *info,
@@ -162,234 +183,51 @@  static void qxl_deferred_io(struct fb_info *info,
 	if (min < max) {
 		y1 = min / info->fix.line_length;
 		y2 = (max / info->fix.line_length) + 1;
-
-		/* TODO: add spin lock? */
-		/* spin_lock_irqsave(&qfbdev->dirty.lock, flags); */
-		qfbdev->dirty.x1 = 0;
-		qfbdev->dirty.y1 = y1;
-		qfbdev->dirty.x2 = info->var.xres;
-		qfbdev->dirty.y2 = y2;
-		/* spin_unlock_irqrestore(&qfbdev->dirty.lock, flags); */
+		qxl_dirty_update(qfbdev, 0, y1, info->var.xres, y2 - y1);
 	}
-
-	qxl_fb_dirty_flush(info);
 };
 
-
 static struct fb_deferred_io qxl_defio = {
 	.delay		= QXL_DIRTY_DELAY,
 	.deferred_io	= qxl_deferred_io,
 };
 
-static void qxl_fb_delayed_fillrect(struct qxl_fbdev *qfbdev,
-				    const struct fb_fillrect *fb_rect)
-{
-	struct qxl_fb_op *op;
-	unsigned long flags;
-
-	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
-	if (!op)
-		return;
-
-	op->op.fr = *fb_rect;
-	op->img_data = NULL;
-	op->op_type = QXL_FB_OP_FILLRECT;
-
-	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
-	list_add_tail(&op->head, &qfbdev->delayed_ops);
-	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
-}
-
-static void qxl_fb_delayed_copyarea(struct qxl_fbdev *qfbdev,
-				    const struct fb_copyarea *fb_copy)
-{
-	struct qxl_fb_op *op;
-	unsigned long flags;
-
-	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
-	if (!op)
-		return;
-
-	op->op.ca = *fb_copy;
-	op->img_data = NULL;
-	op->op_type = QXL_FB_OP_COPYAREA;
-
-	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
-	list_add_tail(&op->head, &qfbdev->delayed_ops);
-	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
-}
-
-static void qxl_fb_delayed_imageblit(struct qxl_fbdev *qfbdev,
-				     const struct fb_image *fb_image)
-{
-	struct qxl_fb_op *op;
-	unsigned long flags;
-	uint32_t size = fb_image->width * fb_image->height * (fb_image->depth >= 8 ? fb_image->depth / 8 : 1);
-
-	op = kmalloc(sizeof(struct qxl_fb_op) + size, GFP_ATOMIC | __GFP_NOWARN);
-	if (!op)
-		return;
-
-	op->op.ib = *fb_image;
-	op->img_data = (void *)(op + 1);
-	op->op_type = QXL_FB_OP_IMAGEBLIT;
-
-	memcpy(op->img_data, fb_image->data, size);
-
-	op->op.ib.data = op->img_data;
-	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
-	list_add_tail(&op->head, &qfbdev->delayed_ops);
-	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
-}
-
-static void qxl_fb_fillrect_internal(struct fb_info *info,
-				     const struct fb_fillrect *fb_rect)
-{
-	struct qxl_fbdev *qfbdev = info->par;
-	struct qxl_device *qdev = qfbdev->qdev;
-	struct qxl_rect rect;
-	uint32_t color;
-	int x = fb_rect->dx;
-	int y = fb_rect->dy;
-	int width = fb_rect->width;
-	int height = fb_rect->height;
-	uint16_t rop;
-	struct qxl_draw_fill qxl_draw_fill_rec;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
-		color = ((u32 *) (info->pseudo_palette))[fb_rect->color];
-	else
-		color = fb_rect->color;
-	rect.left = x;
-	rect.right = x + width;
-	rect.top = y;
-	rect.bottom = y + height;
-	switch (fb_rect->rop) {
-	case ROP_XOR:
-		rop = SPICE_ROPD_OP_XOR;
-		break;
-	case ROP_COPY:
-		rop = SPICE_ROPD_OP_PUT;
-		break;
-	default:
-		pr_err("qxl_fb_fillrect(): unknown rop, "
-		       "defaulting to SPICE_ROPD_OP_PUT\n");
-		rop = SPICE_ROPD_OP_PUT;
-	}
-	qxl_draw_fill_rec.qdev = qdev;
-	qxl_draw_fill_rec.rect = rect;
-	qxl_draw_fill_rec.color = color;
-	qxl_draw_fill_rec.rop = rop;
-
-	qxl_draw_fill(&qxl_draw_fill_rec);
-}
-
 static void qxl_fb_fillrect(struct fb_info *info,
-			    const struct fb_fillrect *fb_rect)
+			    const struct fb_fillrect *rect)
 {
 	struct qxl_fbdev *qfbdev = info->par;
-	struct qxl_device *qdev = qfbdev->qdev;
 
-	if (!drm_can_sleep()) {
-		qxl_fb_delayed_fillrect(qfbdev, fb_rect);
-		schedule_work(&qdev->fb_work);
-		return;
-	}
-	/* make sure any previous work is done */
-	flush_work(&qdev->fb_work);
-	qxl_fb_fillrect_internal(info, fb_rect);
-}
-
-static void qxl_fb_copyarea_internal(struct fb_info *info,
-				     const struct fb_copyarea *region)
-{
-	struct qxl_fbdev *qfbdev = info->par;
-
-	qxl_draw_copyarea(qfbdev->qdev,
-			  region->width, region->height,
-			  region->sx, region->sy,
-			  region->dx, region->dy);
+	sys_fillrect(info, rect);
+	qxl_dirty_update(qfbdev, rect->dx, rect->dy, rect->width,
+			 rect->height);
 }
 
 static void qxl_fb_copyarea(struct fb_info *info,
-			    const struct fb_copyarea *region)
+			    const struct fb_copyarea *area)
 {
 	struct qxl_fbdev *qfbdev = info->par;
-	struct qxl_device *qdev = qfbdev->qdev;
 
-	if (!drm_can_sleep()) {
-		qxl_fb_delayed_copyarea(qfbdev, region);
-		schedule_work(&qdev->fb_work);
-		return;
-	}
-	/* make sure any previous work is done */
-	flush_work(&qdev->fb_work);
-	qxl_fb_copyarea_internal(info, region);
-}
-
-static void qxl_fb_imageblit_safe(struct qxl_fb_image *qxl_fb_image)
-{
-	qxl_draw_opaque_fb(qxl_fb_image, 0);
-}
-
-static void qxl_fb_imageblit_internal(struct fb_info *info,
-				      const struct fb_image *image)
-{
-	struct qxl_fbdev *qfbdev = info->par;
-	struct qxl_fb_image qxl_fb_image;
-
-	/* ensure proper order  rendering operations - TODO: must do this
-	 * for everything. */
-	qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image);
-	qxl_fb_imageblit_safe(&qxl_fb_image);
+	sys_copyarea(info, area);
+	qxl_dirty_update(qfbdev, area->dx, area->dy, area->width,
+			 area->height);
 }
 
 static void qxl_fb_imageblit(struct fb_info *info,
 			     const struct fb_image *image)
 {
 	struct qxl_fbdev *qfbdev = info->par;
-	struct qxl_device *qdev = qfbdev->qdev;
 
-	if (!drm_can_sleep()) {
-		qxl_fb_delayed_imageblit(qfbdev, image);
-		schedule_work(&qdev->fb_work);
-		return;
-	}
-	/* make sure any previous work is done */
-	flush_work(&qdev->fb_work);
-	qxl_fb_imageblit_internal(info, image);
+	sys_imageblit(info, image);
+	qxl_dirty_update(qfbdev, image->dx, image->dy, image->width,
+			 image->height);
 }
 
 static void qxl_fb_work(struct work_struct *work)
 {
 	struct qxl_device *qdev = container_of(work, struct qxl_device, fb_work);
-	unsigned long flags;
-	struct qxl_fb_op *entry, *tmp;
 	struct qxl_fbdev *qfbdev = qdev->mode_info.qfbdev;
 
-	/* since the irq context just adds entries to the end of the
-	   list dropping the lock should be fine, as entry isn't modified
-	   in the operation code */
-	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
-	list_for_each_entry_safe(entry, tmp, &qfbdev->delayed_ops, head) {
-		spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
-		switch (entry->op_type) {
-		case QXL_FB_OP_FILLRECT:
-			qxl_fb_fillrect_internal(qfbdev->helper.fbdev, &entry->op.fr);
-			break;
-		case QXL_FB_OP_COPYAREA:
-			qxl_fb_copyarea_internal(qfbdev->helper.fbdev, &entry->op.ca);
-			break;
-		case QXL_FB_OP_IMAGEBLIT:
-			qxl_fb_imageblit_internal(qfbdev->helper.fbdev, &entry->op.ib);
-			break;
-		}
-		spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
-		list_del(&entry->head);
-		kfree(entry);
-	}
-	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+	qxl_fb_dirty_flush(qfbdev->helper.fbdev);
 }
 
 int qxl_fb_init(struct qxl_device *qdev)
@@ -678,6 +516,7 @@  int qxl_fbdev_init(struct qxl_device *qdev)
 	qfbdev->qdev = qdev;
 	qdev->mode_info.qfbdev = qfbdev;
 	spin_lock_init(&qfbdev->delayed_ops_lock);
+	spin_lock_init(&qfbdev->dirty.lock);
 	INIT_LIST_HEAD(&qfbdev->delayed_ops);
 
 	drm_fb_helper_prepare(qdev->ddev, &qfbdev->helper,