diff mbox

Avoid fallbacks for gradient patterns

Message ID 1252092408-29419-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State Not Applicable
Headers show

Commit Message

Chris Wilson Sept. 4, 2009, 7:26 p.m. UTC
While not yet fully accelerating gradient patterns, by using pixman to
compute the gradient image and copying that to a pixmap to use as the
source, we avoid incurring the GPU stall suffered currently from
reading back the destination surface.

Speedups on i915:
firefox-talos-svg:  710378.14 -> 549262.96:  1.29x speedup

No slowdowns.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 src/i830_render.c |   14 ++-
 src/i915_render.c |   12 ++-
 src/i965_render.c |   14 ++-
 uxa/uxa-render.c  |  261 ++++++++++++++++++++++++++++++++++++++++-------------
 4 files changed, 223 insertions(+), 78 deletions(-)

Comments

Eric Anholt Sept. 4, 2009, 7:35 p.m. UTC | #1
On Fri, 2009-09-04 at 20:26 +0100, Chris Wilson wrote:
> While not yet fully accelerating gradient patterns, by using pixman to
> compute the gradient image and copying that to a pixmap to use as the
> source, we avoid incurring the GPU stall suffered currently from
> reading back the destination surface.

The destination Picture (the one passed in to our uxa hooks) is supposed
to always have a pDrawable, and if it doesn't, that should have been
caught at a higher level.

> 
> Speedups on i915:
> firefox-talos-svg:  710378.14 -> 549262.96:  1.29x speedup
> 
> No slowdowns.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  src/i830_render.c |   14 ++-
>  src/i915_render.c |   12 ++-
>  src/i965_render.c |   14 ++-
>  uxa/uxa-render.c  |  261 ++++++++++++++++++++++++++++++++++++++++-------------
>  4 files changed, 223 insertions(+), 78 deletions(-)
> 
> diff --git a/src/i830_render.c b/src/i830_render.c
> index e4c4623..8418bd7 100644
> --- a/src/i830_render.c
> +++ b/src/i830_render.c
> @@ -220,11 +220,15 @@ static Bool i830_get_blend_cntl(ScrnInfoPtr pScrn, int op, PicturePtr pMask,
>  
>  static Bool i830_check_composite_texture(PicturePtr pPict, int unit)
>  {
> -    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> -    int w = pPict->pDrawable->width;
> -    int h = pPict->pDrawable->height;
> -    int i;
> +    ScrnInfoPtr pScrn;
> +    int w, h, i;
>  
> +    if (!pPict->pDrawable)
> +	return TRUE;
> +
> +    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> +    w = pPict->pDrawable->width;
> +    h = pPict->pDrawable->height;
>      if ((w > 2048) || (h > 2048))
>          I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
>  
> @@ -393,7 +397,7 @@ i830_prepare_composite(int op, PicturePtr pSrcPicture,
>  		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
>  		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
>  {
> -    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
> +    ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
>      I830Ptr pI830 = I830PTR(pScrn);
>      Bool is_affine_src, is_affine_mask;
>      Bool is_nearest = FALSE;
> diff --git a/src/i915_render.c b/src/i915_render.c
> index c81366a..bb755c5 100644
> --- a/src/i915_render.c
> +++ b/src/i915_render.c
> @@ -169,11 +169,15 @@ static Bool i915_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format)
>  
>  static Bool i915_check_composite_texture(PicturePtr pPict, int unit)
>  {
> -    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> -    int w = pPict->pDrawable->width;
> -    int h = pPict->pDrawable->height;
> -    int i;
> +    ScrnInfoPtr pScrn;
> +    int w, h, i;
> +
> +    if (!pPict->pDrawable)
> +	return TRUE;
>  
> +    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> +    w = pPict->pDrawable->width;
> +    h = pPict->pDrawable->height;
>      if ((w > 2048) || (h > 2048))
>          I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
>  
> diff --git a/src/i965_render.c b/src/i965_render.c
> index 1a8075b..1d88af8 100644
> --- a/src/i965_render.c
> +++ b/src/i965_render.c
> @@ -183,11 +183,15 @@ static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format)
>  
>  static Bool i965_check_composite_texture(PicturePtr pPict, int unit)
>  {
> -    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> -    int w = pPict->pDrawable->width;
> -    int h = pPict->pDrawable->height;
> -    int i;
> +    ScrnInfoPtr pScrn;
> +    int w, h, i;
> +
> +    if (!pPict->pDrawable)
> +	return TRUE;
>  
> +    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
> +    w = pPict->pDrawable->width;
> +    h = pPict->pDrawable->height;
>      if ((w > 8192) || (h > 8192))
>          I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
>  
> @@ -1438,7 +1442,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
>  		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
>  		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
>  {
> -    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
> +    ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
>      I830Ptr pI830 = I830PTR(pScrn);
>      struct gen4_render_state *render_state= pI830->gen4_render_state;
>      gen4_composite_op *composite_op = &render_state->composite_op;
> diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
> index 13128ed..fde10b7 100644
> --- a/uxa/uxa-render.c
> +++ b/uxa/uxa-render.c
> @@ -314,6 +314,131 @@ uxa_try_driver_solid_fill(PicturePtr	pSrc,
>      return 1;
>  }
>  
> +static PicturePtr
> +uxa_picture_from_pixman_image (ScreenPtr pScreen,
> +			       pixman_image_t *image,
> +			       pixman_format_code_t format)
> +{
> +    PicturePtr pPicture;
> +    PixmapPtr pPixmap;
> +    GCPtr pGC;
> +    int width, height, depth;
> +    int error;
> +
> +    width = pixman_image_get_width (image);
> +    height = pixman_image_get_height (image);
> +    depth = pixman_image_get_depth (image);
> +
> +    pPixmap = (*pScreen->CreatePixmap) (pScreen, width, height, depth,
> +					UXA_CREATE_PIXMAP_FOR_MAP);
> +    if (!pPixmap)
> +	return 0;
> +
> +    pPicture = CreatePicture (0, &pPixmap->drawable,
> +			      PictureMatchFormat (pScreen, depth, format),
> +			      0, 0, serverClient, &error);
> +    (*pScreen->DestroyPixmap) (pPixmap);
> +
> +
> +    pPixmap = GetScratchPixmapHeader(pScreen, width, height, depth,
> +				     BitsPerPixel (depth),
> +				     pixman_image_get_stride (image),
> +				     pixman_image_get_data (image));
> +    if (!pPixmap)
> +    {
> +	FreePicture (pPicture, 0);
> +	return 0;
> +    }
> +
> +    pGC = GetScratchGC (depth, pScreen);
> +    if (!pGC)
> +    {
> +	FreeScratchPixmapHeader (pPixmap);
> +	FreePicture (pPicture, 0);
> +	return 0;
> +    }
> +    ValidateGC (pPicture->pDrawable, pGC);
> +
> +    (*pGC->ops->CopyArea) (&pPixmap->drawable, pPicture->pDrawable,
> +			   pGC, 0, 0, width, height, 0, 0);
> +
> +    FreeScratchGC (pGC);
> +    FreeScratchPixmapHeader (pPixmap);
> +
> +    return pPicture;
> +}
> +
> +static PicturePtr
> +uxa_get_pattern (ScreenPtr pScreen,
> +		 PicturePtr pPict,
> +		 pixman_format_code_t format,
> +		 INT16 x, INT16 y,
> +		 CARD16 width, CARD16 height)
> +{
> +    pixman_image_t *source, *image;
> +
> +    source = image_from_pict (pPict, 0, 0);
> +    if (!source)
> +	return 0;
> +
> +    image = pixman_image_create_bits (format, width, height, NULL, 0);
> +    if (!image) {
> +	pixman_image_unref (source);
> +	return 0;
> +    }
> +
> +    pixman_image_composite (PIXMAN_OP_SRC,
> +			    source, NULL, image,
> +			    x, y,
> +			    0, 0,
> +			    0, 0,
> +			    width, height);
> +    pixman_image_unref (source);
> +
> +    pPict = uxa_picture_from_pixman_image (pScreen, image, format);
> +    pixman_image_unref (image);
> +
> +    return pPict;
> +}
> +
> +static PicturePtr
> +uxa_get_source (ScreenPtr pScreen,
> +		PicturePtr pPict,
> +		INT16 x, INT16 y,
> +		CARD16 width, CARD16 height,
> +		INT16 *out_x, INT16 *out_y)
> +{
> +    if (pPict->pDrawable) {
> +	*out_x = x + pPict->pDrawable->x;
> +	*out_y = y + pPict->pDrawable->y;
> +	return pPict;
> +    }
> +
> +    *out_x = 0;
> +    *out_y = 0;
> +    return uxa_get_pattern (pScreen, pPict,
> +			    PICT_a8r8g8b8, x, y, width, height);
> +}
> +
> +static PicturePtr
> +uxa_get_mask (ScreenPtr pScreen,
> +	      PicturePtr pPict,
> +	      INT16 x, INT16 y,
> +	      INT16 width, INT16 height,
> +	      INT16 *out_x, INT16 *out_y)
> +{
> +    if (pPict->pDrawable) {
> +	*out_x = x + pPict->pDrawable->x;
> +	*out_y = y + pPict->pDrawable->y;
> +	return pPict;
> +    }
> +
> +    *out_x = 0;
> +    *out_y = 0;
> +    return uxa_get_pattern (pScreen, pPict,
> +			    PICT_a8, x, y, width, height);
> +}
> +
>  static int
>  uxa_try_driver_composite_rects(CARD8		    op,
>  			       PicturePtr	    pSrc,
> @@ -333,7 +458,7 @@ uxa_try_driver_composite_rects(CARD8		    op,
>      {
>  	return -1;
>      }
> -    
> +
>      pDstPix = uxa_get_offscreen_pixmap(pDst->pDrawable, &dst_off_x, &dst_off_y);
>      if (!pDstPix)
>  	return 0;
> @@ -453,48 +578,84 @@ uxa_try_driver_composite(CARD8		op,
>      int nbox;
>      int src_off_x, src_off_y, mask_off_x, mask_off_y, dst_off_x, dst_off_y;
>      PixmapPtr pSrcPix, pMaskPix = NULL, pDstPix;
> +    PicturePtr localSrc, localMask = NULL;
>  
>      xDst += pDst->pDrawable->x;
>      yDst += pDst->pDrawable->y;
>  
> +    localSrc = uxa_get_source (pDst->pDrawable->pScreen,
> +			       pSrc, xSrc, ySrc, width, height,
> +			       &xSrc, &ySrc);
> +    if (! localSrc)
> +	return 0;
> +
>      if (pMask) {
> -	xMask += pMask->pDrawable->x;
> -	yMask += pMask->pDrawable->y;
> -    }
> +	localMask = uxa_get_mask (pDst->pDrawable->pScreen,
> +				  pMask, xMask, yMask, width, height,
> +				  &xMask, &yMask);
> +	if (! localMask) {
> +	    if (localSrc != pSrc)
> +		FreePicture (localSrc, 0);
>  
> -    xSrc += pSrc->pDrawable->x;
> -    ySrc += pSrc->pDrawable->y;
> +	    return 0;
> +	}
> +    }
>  
>      if (uxa_screen->info->check_composite &&
> -	!(*uxa_screen->info->check_composite) (op, pSrc, pMask, pDst))
> +	!(*uxa_screen->info->check_composite) (op, localSrc, localMask, pDst))
>      {
> +	if (localSrc != pSrc)
> +	    FreePicture (localSrc, 0);
> +	if (localMask && localMask != pMask)
> +	    FreePicture (localMask, 0);
> +
>  	return -1;
>      }
>  
> -    if (!miComputeCompositeRegion (&region, pSrc, pMask, pDst,
> +    if (!miComputeCompositeRegion (&region, localSrc, localMask, pDst,
>  				   xSrc, ySrc, xMask, yMask, xDst, yDst,
>  				   width, height))
> +    {
> +	if (localSrc != pSrc)
> +	    FreePicture (localSrc, 0);
> +	if (localMask && localMask != pMask)
> +	    FreePicture (localMask, 0);
> +
>  	return 1;
> +    }
>  
>      pDstPix = uxa_get_offscreen_pixmap (pDst->pDrawable, &dst_off_x, &dst_off_y);
>  
> -    pSrcPix = uxa_get_offscreen_pixmap (pSrc->pDrawable, &src_off_x, &src_off_y);
> +    pSrcPix = uxa_get_offscreen_pixmap (localSrc->pDrawable,
> +					&src_off_x, &src_off_y);
>  
> -    if (pMask)
> -	pMaskPix = uxa_get_offscreen_pixmap (pMask->pDrawable, &mask_off_x,
> -					     &mask_off_y);
> +    if (localMask)
> +	pMaskPix = uxa_get_offscreen_pixmap (localMask->pDrawable,
> +					     &mask_off_x, &mask_off_y);
>  
> -    if (!pDstPix || !pSrcPix || (pMask && !pMaskPix)) {
> +    if (!pDstPix || !pSrcPix || (localMask && !pMaskPix)) {
>  	REGION_UNINIT(pDst->pDrawable->pScreen, &region);
> +
> +	if (localSrc != pSrc)
> +	    FreePicture (localSrc, 0);
> +	if (localMask && localMask != pMask)
> +	    FreePicture (localMask, 0);
> +
>  	return 0;
>      }
>  
>      REGION_TRANSLATE(pScreen, &region, dst_off_x, dst_off_y);
>  
> -    if (!(*uxa_screen->info->prepare_composite) (op, pSrc, pMask, pDst, pSrcPix,
> -						 pMaskPix, pDstPix))
> +    if (!(*uxa_screen->info->prepare_composite) (op, localSrc, localMask, pDst,
> +						 pSrcPix, pMaskPix, pDstPix))
>      {
>  	REGION_UNINIT(pDst->pDrawable->pScreen, &region);
> +
> +	if (localSrc != pSrc)
> +	    FreePicture (localSrc, 0);
> +	if (localMask && localMask != pMask)
> +	    FreePicture (localMask, 0);
> +
>  	return -1;
>      }
>  
> @@ -523,6 +684,12 @@ uxa_try_driver_composite(CARD8		op,
>      (*uxa_screen->info->done_composite) (pDstPix);
>  
>      REGION_UNINIT(pDst->pDrawable->pScreen, &region);
> +
> +    if (localSrc != pSrc)
> +	FreePicture (localSrc, 0);
> +    if (localMask && localMask != pMask)
> +	FreePicture (localMask, 0);
> +
>      return 1;
>  }
>  
> @@ -636,14 +803,11 @@ uxa_composite(CARD8	op,
>      Bool saveMaskRepeat = pMask ? pMask->repeat : 0;
>      RegionRec region;
>  
> -    /* We currently don't support acceleration of gradients, or other pictures
> -     * with a NULL pDrawable.
> -     */
> -    if (uxa_screen->swappedOut ||
> -	pSrc->pDrawable == NULL || (pMask != NULL && pMask->pDrawable == NULL))
> -    {
> +    if (uxa_screen->swappedOut)
>  	goto fallback;
> -    }
> +
> +    if (pSrc->pDrawable == NULL || (pMask && pMask->pDrawable == NULL))
> +	goto composite;
>  
>      /* Remove repeat in source if useless */
>      if (pSrc->repeat && !pSrc->transform && xSrc >= 0 &&
> @@ -744,6 +908,7 @@ uxa_composite(CARD8	op,
>  	(yMask + height) <= pMask->pDrawable->height)
>  	    pMask->repeat = 0;
>  
> +composite:
>      if (uxa_screen->info->prepare_composite &&
>  	!pSrc->alphaMap && (!pMask || !pMask->alphaMap) && !pDst->alphaMap)
>      {
> @@ -757,7 +922,8 @@ uxa_composite(CARD8	op,
>  	/* For generic masks and solid src pictures, mach64 can do Over in two
>  	 * passes, similar to the component-alpha case.
>  	 */
> -	isSrcSolid = pSrc->pDrawable->width == 1 &&
> +	isSrcSolid = pSrc->pDrawable &&
> +		     pSrc->pDrawable->width == 1 &&
>  		     pSrc->pDrawable->height == 1 &&
>  		     pSrc->repeat;
>  
> @@ -888,62 +1054,29 @@ uxa_trapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
>  	PicturePtr	pPicture;
>  	INT16		xDst, yDst;
>  	INT16		xRel, yRel;
> -	int		width, height, stride;
> -	PixmapPtr	pPixmap;
> -	GCPtr		pGC;
> +	int		width, height;
>  	pixman_image_t	*image;
> +	pixman_format_code_t format;
>  
>  	xDst = traps[0].left.p1.x >> 16;
>  	yDst = traps[0].left.p1.y >> 16;
>  
>  	width = bounds.x2 - bounds.x1;
>  	height = bounds.y2 - bounds.y1;
> -	stride = (width * BitsPerPixel (maskFormat->depth) + 7) / 8;
>  
> -	pPicture = uxa_create_alpha_picture (pScreen, pDst, maskFormat,
> -					     width, height);
> -	if (!pPicture)
> +	format = maskFormat->format | (BitsPerPixel (maskFormat->depth) << 24);
> +	image = pixman_image_create_bits (format, width, height, NULL, 0);
> +	if (!image)
>  	    return;
>  
> -	image = pixman_image_create_bits (pPicture->format,
> -					  width, height,
> -					  NULL, stride);
> -	if (!image) {
> -	    FreePicture (pPicture, 0);
> -	    return;
> -	}
> -
>  	for (; ntrap; ntrap--, traps++)
>  	    pixman_rasterize_trapezoid (image, (pixman_trapezoid_t *) traps,
>  					-bounds.x1, -bounds.y1);
>  
> -	pPixmap = GetScratchPixmapHeader(pScreen, width, height,
> -					 maskFormat->depth,
> -					 BitsPerPixel (maskFormat->depth),
> -					 PixmapBytePad (width, maskFormat->depth),
> -					 pixman_image_get_data (image));
> -	if (!pPixmap) {
> -	    FreePicture (pPicture, 0);
> -	    pixman_image_unref (image);
> -	    return;
> -	}
> -
> -	pGC = GetScratchGC (pPicture->pDrawable->depth, pScreen);
> -	if (!pGC)
> -	{
> -	    FreeScratchPixmapHeader (pPixmap);
> -	    pixman_image_unref (image);
> -	    FreePicture (pPicture, 0);
> -	    return;
> -	}
> -	ValidateGC (pPicture->pDrawable, pGC);
> -
> -	(*pGC->ops->CopyArea) (&pPixmap->drawable, pPicture->pDrawable,
> -			       pGC, 0, 0, width, height, 0, 0);
> -
> -	FreeScratchGC (pGC);
> -	FreeScratchPixmapHeader (pPixmap);
> +	pPicture = uxa_picture_from_pixman_image (pScreen, image, format);
>  	pixman_image_unref (image);
> +	if (!pPicture)
> +	    return;
>  
>  	xRel = bounds.x1 + xSrc - xDst;
>  	yRel = bounds.y1 + ySrc - yDst;
Chris Wilson Sept. 4, 2009, 7:51 p.m. UTC | #2
Excerpts from Eric Anholt's message of Fri Sep 04 20:35:29 +0100 2009:
> On Fri, 2009-09-04 at 20:26 +0100, Chris Wilson wrote:
> > While not yet fully accelerating gradient patterns, by using pixman to
> > compute the gradient image and copying that to a pixmap to use as the
> > source, we avoid incurring the GPU stall suffered currently from
> > reading back the destination surface.
> 
> The destination Picture (the one passed in to our uxa hooks) is supposed
> to always have a pDrawable, and if it doesn't, that should have been
> caught at a higher level.

Hmm, didn't explain the problem well enough (or at all in this case).
The issue is that if asked to composite using a gradient source, and/or
mask, we fallback to fbComposite, which causes the readback of the
destination. All this patch does, like uxa_trapezoids(), is to allocate
a scratch pixmap for the source (or mask) using pixman to fill it and then
uses CopyArea to transfer that to an offscreen pixmap.
-ickle
Eric Anholt Sept. 4, 2009, 7:57 p.m. UTC | #3
On Fri, 2009-09-04 at 20:51 +0100, Chris Wilson wrote:
> Excerpts from Eric Anholt's message of Fri Sep 04 20:35:29 +0100 2009:
> > On Fri, 2009-09-04 at 20:26 +0100, Chris Wilson wrote:
> > > While not yet fully accelerating gradient patterns, by using pixman to
> > > compute the gradient image and copying that to a pixmap to use as the
> > > source, we avoid incurring the GPU stall suffered currently from
> > > reading back the destination surface.
> > 
> > The destination Picture (the one passed in to our uxa hooks) is supposed
> > to always have a pDrawable, and if it doesn't, that should have been
> > caught at a higher level.
> 
> Hmm, didn't explain the problem well enough (or at all in this case).
> The issue is that if asked to composite using a gradient source, and/or
> mask, we fallback to fbComposite, which causes the readback of the
> destination. All this patch does, like uxa_trapezoids(), is to allocate
> a scratch pixmap for the source (or mask) using pixman to fill it and then
> uses CopyArea to transfer that to an offscreen pixmap.

Sorry, I missed that they were in the texture-setup functions.  Looks
good.
Clemens Eisserer Sept. 4, 2009, 10:40 p.m. UTC | #4
Great work, thanks a lot :)

- Clemens
diff mbox

Patch

diff --git a/src/i830_render.c b/src/i830_render.c
index e4c4623..8418bd7 100644
--- a/src/i830_render.c
+++ b/src/i830_render.c
@@ -220,11 +220,15 @@  static Bool i830_get_blend_cntl(ScrnInfoPtr pScrn, int op, PicturePtr pMask,
 
 static Bool i830_check_composite_texture(PicturePtr pPict, int unit)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
-    int w = pPict->pDrawable->width;
-    int h = pPict->pDrawable->height;
-    int i;
+    ScrnInfoPtr pScrn;
+    int w, h, i;
 
+    if (!pPict->pDrawable)
+	return TRUE;
+
+    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
+    w = pPict->pDrawable->width;
+    h = pPict->pDrawable->height;
     if ((w > 2048) || (h > 2048))
         I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
 
@@ -393,7 +397,7 @@  i830_prepare_composite(int op, PicturePtr pSrcPicture,
 		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
 		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
+    ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
     Bool is_affine_src, is_affine_mask;
     Bool is_nearest = FALSE;
diff --git a/src/i915_render.c b/src/i915_render.c
index c81366a..bb755c5 100644
--- a/src/i915_render.c
+++ b/src/i915_render.c
@@ -169,11 +169,15 @@  static Bool i915_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format)
 
 static Bool i915_check_composite_texture(PicturePtr pPict, int unit)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
-    int w = pPict->pDrawable->width;
-    int h = pPict->pDrawable->height;
-    int i;
+    ScrnInfoPtr pScrn;
+    int w, h, i;
+
+    if (!pPict->pDrawable)
+	return TRUE;
 
+    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
+    w = pPict->pDrawable->width;
+    h = pPict->pDrawable->height;
     if ((w > 2048) || (h > 2048))
         I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
 
diff --git a/src/i965_render.c b/src/i965_render.c
index 1a8075b..1d88af8 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -183,11 +183,15 @@  static Bool i965_get_dest_format(PicturePtr pDstPicture, uint32_t *dst_format)
 
 static Bool i965_check_composite_texture(PicturePtr pPict, int unit)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
-    int w = pPict->pDrawable->width;
-    int h = pPict->pDrawable->height;
-    int i;
+    ScrnInfoPtr pScrn;
+    int w, h, i;
+
+    if (!pPict->pDrawable)
+	return TRUE;
 
+    pScrn = xf86Screens[pPict->pDrawable->pScreen->myNum];
+    w = pPict->pDrawable->width;
+    h = pPict->pDrawable->height;
     if ((w > 8192) || (h > 8192))
         I830FALLBACK("Picture w/h too large (%dx%d)\n", w, h);
 
@@ -1438,7 +1442,7 @@  i965_prepare_composite(int op, PicturePtr pSrcPicture,
 		       PicturePtr pMaskPicture, PicturePtr pDstPicture,
 		       PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum];
+    ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
     I830Ptr pI830 = I830PTR(pScrn);
     struct gen4_render_state *render_state= pI830->gen4_render_state;
     gen4_composite_op *composite_op = &render_state->composite_op;
diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 13128ed..fde10b7 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -314,6 +314,131 @@  uxa_try_driver_solid_fill(PicturePtr	pSrc,
     return 1;
 }
 
+static PicturePtr
+uxa_picture_from_pixman_image (ScreenPtr pScreen,
+			       pixman_image_t *image,
+			       pixman_format_code_t format)
+{
+    PicturePtr pPicture;
+    PixmapPtr pPixmap;
+    GCPtr pGC;
+    int width, height, depth;
+    int error;
+
+    width = pixman_image_get_width (image);
+    height = pixman_image_get_height (image);
+    depth = pixman_image_get_depth (image);
+
+    pPixmap = (*pScreen->CreatePixmap) (pScreen, width, height, depth,
+					UXA_CREATE_PIXMAP_FOR_MAP);
+    if (!pPixmap)
+	return 0;
+
+    pPicture = CreatePicture (0, &pPixmap->drawable,
+			      PictureMatchFormat (pScreen, depth, format),
+			      0, 0, serverClient, &error);
+    (*pScreen->DestroyPixmap) (pPixmap);
+
+
+    pPixmap = GetScratchPixmapHeader(pScreen, width, height, depth,
+				     BitsPerPixel (depth),
+				     pixman_image_get_stride (image),
+				     pixman_image_get_data (image));
+    if (!pPixmap)
+    {
+	FreePicture (pPicture, 0);
+	return 0;
+    }
+
+    pGC = GetScratchGC (depth, pScreen);
+    if (!pGC)
+    {
+	FreeScratchPixmapHeader (pPixmap);
+	FreePicture (pPicture, 0);
+	return 0;
+    }
+    ValidateGC (pPicture->pDrawable, pGC);
+
+    (*pGC->ops->CopyArea) (&pPixmap->drawable, pPicture->pDrawable,
+			   pGC, 0, 0, width, height, 0, 0);
+
+    FreeScratchGC (pGC);
+    FreeScratchPixmapHeader (pPixmap);
+
+    return pPicture;
+}
+
+static PicturePtr
+uxa_get_pattern (ScreenPtr pScreen,
+		 PicturePtr pPict,
+		 pixman_format_code_t format,
+		 INT16 x, INT16 y,
+		 CARD16 width, CARD16 height)
+{
+    pixman_image_t *source, *image;
+
+    source = image_from_pict (pPict, 0, 0);
+    if (!source)
+	return 0;
+
+    image = pixman_image_create_bits (format, width, height, NULL, 0);
+    if (!image) {
+	pixman_image_unref (source);
+	return 0;
+    }
+
+    pixman_image_composite (PIXMAN_OP_SRC,
+			    source, NULL, image,
+			    x, y,
+			    0, 0,
+			    0, 0,
+			    width, height);
+    pixman_image_unref (source);
+
+    pPict = uxa_picture_from_pixman_image (pScreen, image, format);
+    pixman_image_unref (image);
+
+    return pPict;
+}
+
+static PicturePtr
+uxa_get_source (ScreenPtr pScreen,
+		PicturePtr pPict,
+		INT16 x, INT16 y,
+		CARD16 width, CARD16 height,
+		INT16 *out_x, INT16 *out_y)
+{
+    if (pPict->pDrawable) {
+	*out_x = x + pPict->pDrawable->x;
+	*out_y = y + pPict->pDrawable->y;
+	return pPict;
+    }
+
+    *out_x = 0;
+    *out_y = 0;
+    return uxa_get_pattern (pScreen, pPict,
+			    PICT_a8r8g8b8, x, y, width, height);
+}
+
+static PicturePtr
+uxa_get_mask (ScreenPtr pScreen,
+	      PicturePtr pPict,
+	      INT16 x, INT16 y,
+	      INT16 width, INT16 height,
+	      INT16 *out_x, INT16 *out_y)
+{
+    if (pPict->pDrawable) {
+	*out_x = x + pPict->pDrawable->x;
+	*out_y = y + pPict->pDrawable->y;
+	return pPict;
+    }
+
+    *out_x = 0;
+    *out_y = 0;
+    return uxa_get_pattern (pScreen, pPict,
+			    PICT_a8, x, y, width, height);
+}
+
 static int
 uxa_try_driver_composite_rects(CARD8		    op,
 			       PicturePtr	    pSrc,
@@ -333,7 +458,7 @@  uxa_try_driver_composite_rects(CARD8		    op,
     {
 	return -1;
     }
-    
+
     pDstPix = uxa_get_offscreen_pixmap(pDst->pDrawable, &dst_off_x, &dst_off_y);
     if (!pDstPix)
 	return 0;
@@ -453,48 +578,84 @@  uxa_try_driver_composite(CARD8		op,
     int nbox;
     int src_off_x, src_off_y, mask_off_x, mask_off_y, dst_off_x, dst_off_y;
     PixmapPtr pSrcPix, pMaskPix = NULL, pDstPix;
+    PicturePtr localSrc, localMask = NULL;
 
     xDst += pDst->pDrawable->x;
     yDst += pDst->pDrawable->y;
 
+    localSrc = uxa_get_source (pDst->pDrawable->pScreen,
+			       pSrc, xSrc, ySrc, width, height,
+			       &xSrc, &ySrc);
+    if (! localSrc)
+	return 0;
+
     if (pMask) {
-	xMask += pMask->pDrawable->x;
-	yMask += pMask->pDrawable->y;
-    }
+	localMask = uxa_get_mask (pDst->pDrawable->pScreen,
+				  pMask, xMask, yMask, width, height,
+				  &xMask, &yMask);
+	if (! localMask) {
+	    if (localSrc != pSrc)
+		FreePicture (localSrc, 0);
 
-    xSrc += pSrc->pDrawable->x;
-    ySrc += pSrc->pDrawable->y;
+	    return 0;
+	}
+    }
 
     if (uxa_screen->info->check_composite &&
-	!(*uxa_screen->info->check_composite) (op, pSrc, pMask, pDst))
+	!(*uxa_screen->info->check_composite) (op, localSrc, localMask, pDst))
     {
+	if (localSrc != pSrc)
+	    FreePicture (localSrc, 0);
+	if (localMask && localMask != pMask)
+	    FreePicture (localMask, 0);
+
 	return -1;
     }
 
-    if (!miComputeCompositeRegion (&region, pSrc, pMask, pDst,
+    if (!miComputeCompositeRegion (&region, localSrc, localMask, pDst,
 				   xSrc, ySrc, xMask, yMask, xDst, yDst,
 				   width, height))
+    {
+	if (localSrc != pSrc)
+	    FreePicture (localSrc, 0);
+	if (localMask && localMask != pMask)
+	    FreePicture (localMask, 0);
+
 	return 1;
+    }
 
     pDstPix = uxa_get_offscreen_pixmap (pDst->pDrawable, &dst_off_x, &dst_off_y);
 
-    pSrcPix = uxa_get_offscreen_pixmap (pSrc->pDrawable, &src_off_x, &src_off_y);
+    pSrcPix = uxa_get_offscreen_pixmap (localSrc->pDrawable,
+					&src_off_x, &src_off_y);
 
-    if (pMask)
-	pMaskPix = uxa_get_offscreen_pixmap (pMask->pDrawable, &mask_off_x,
-					     &mask_off_y);
+    if (localMask)
+	pMaskPix = uxa_get_offscreen_pixmap (localMask->pDrawable,
+					     &mask_off_x, &mask_off_y);
 
-    if (!pDstPix || !pSrcPix || (pMask && !pMaskPix)) {
+    if (!pDstPix || !pSrcPix || (localMask && !pMaskPix)) {
 	REGION_UNINIT(pDst->pDrawable->pScreen, &region);
+
+	if (localSrc != pSrc)
+	    FreePicture (localSrc, 0);
+	if (localMask && localMask != pMask)
+	    FreePicture (localMask, 0);
+
 	return 0;
     }
 
     REGION_TRANSLATE(pScreen, &region, dst_off_x, dst_off_y);
 
-    if (!(*uxa_screen->info->prepare_composite) (op, pSrc, pMask, pDst, pSrcPix,
-						 pMaskPix, pDstPix))
+    if (!(*uxa_screen->info->prepare_composite) (op, localSrc, localMask, pDst,
+						 pSrcPix, pMaskPix, pDstPix))
     {
 	REGION_UNINIT(pDst->pDrawable->pScreen, &region);
+
+	if (localSrc != pSrc)
+	    FreePicture (localSrc, 0);
+	if (localMask && localMask != pMask)
+	    FreePicture (localMask, 0);
+
 	return -1;
     }
 
@@ -523,6 +684,12 @@  uxa_try_driver_composite(CARD8		op,
     (*uxa_screen->info->done_composite) (pDstPix);
 
     REGION_UNINIT(pDst->pDrawable->pScreen, &region);
+
+    if (localSrc != pSrc)
+	FreePicture (localSrc, 0);
+    if (localMask && localMask != pMask)
+	FreePicture (localMask, 0);
+
     return 1;
 }
 
@@ -636,14 +803,11 @@  uxa_composite(CARD8	op,
     Bool saveMaskRepeat = pMask ? pMask->repeat : 0;
     RegionRec region;
 
-    /* We currently don't support acceleration of gradients, or other pictures
-     * with a NULL pDrawable.
-     */
-    if (uxa_screen->swappedOut ||
-	pSrc->pDrawable == NULL || (pMask != NULL && pMask->pDrawable == NULL))
-    {
+    if (uxa_screen->swappedOut)
 	goto fallback;
-    }
+
+    if (pSrc->pDrawable == NULL || (pMask && pMask->pDrawable == NULL))
+	goto composite;
 
     /* Remove repeat in source if useless */
     if (pSrc->repeat && !pSrc->transform && xSrc >= 0 &&
@@ -744,6 +908,7 @@  uxa_composite(CARD8	op,
 	(yMask + height) <= pMask->pDrawable->height)
 	    pMask->repeat = 0;
 
+composite:
     if (uxa_screen->info->prepare_composite &&
 	!pSrc->alphaMap && (!pMask || !pMask->alphaMap) && !pDst->alphaMap)
     {
@@ -757,7 +922,8 @@  uxa_composite(CARD8	op,
 	/* For generic masks and solid src pictures, mach64 can do Over in two
 	 * passes, similar to the component-alpha case.
 	 */
-	isSrcSolid = pSrc->pDrawable->width == 1 &&
+	isSrcSolid = pSrc->pDrawable &&
+		     pSrc->pDrawable->width == 1 &&
 		     pSrc->pDrawable->height == 1 &&
 		     pSrc->repeat;
 
@@ -888,62 +1054,29 @@  uxa_trapezoids (CARD8 op, PicturePtr pSrc, PicturePtr pDst,
 	PicturePtr	pPicture;
 	INT16		xDst, yDst;
 	INT16		xRel, yRel;
-	int		width, height, stride;
-	PixmapPtr	pPixmap;
-	GCPtr		pGC;
+	int		width, height;
 	pixman_image_t	*image;
+	pixman_format_code_t format;
 
 	xDst = traps[0].left.p1.x >> 16;
 	yDst = traps[0].left.p1.y >> 16;
 
 	width = bounds.x2 - bounds.x1;
 	height = bounds.y2 - bounds.y1;
-	stride = (width * BitsPerPixel (maskFormat->depth) + 7) / 8;
 
-	pPicture = uxa_create_alpha_picture (pScreen, pDst, maskFormat,
-					     width, height);
-	if (!pPicture)
+	format = maskFormat->format | (BitsPerPixel (maskFormat->depth) << 24);
+	image = pixman_image_create_bits (format, width, height, NULL, 0);
+	if (!image)
 	    return;
 
-	image = pixman_image_create_bits (pPicture->format,
-					  width, height,
-					  NULL, stride);
-	if (!image) {
-	    FreePicture (pPicture, 0);
-	    return;
-	}
-
 	for (; ntrap; ntrap--, traps++)
 	    pixman_rasterize_trapezoid (image, (pixman_trapezoid_t *) traps,
 					-bounds.x1, -bounds.y1);
 
-	pPixmap = GetScratchPixmapHeader(pScreen, width, height,
-					 maskFormat->depth,
-					 BitsPerPixel (maskFormat->depth),
-					 PixmapBytePad (width, maskFormat->depth),
-					 pixman_image_get_data (image));
-	if (!pPixmap) {
-	    FreePicture (pPicture, 0);
-	    pixman_image_unref (image);
-	    return;
-	}
-
-	pGC = GetScratchGC (pPicture->pDrawable->depth, pScreen);
-	if (!pGC)
-	{
-	    FreeScratchPixmapHeader (pPixmap);
-	    pixman_image_unref (image);
-	    FreePicture (pPicture, 0);
-	    return;
-	}
-	ValidateGC (pPicture->pDrawable, pGC);
-
-	(*pGC->ops->CopyArea) (&pPixmap->drawable, pPicture->pDrawable,
-			       pGC, 0, 0, width, height, 0, 0);
-
-	FreeScratchGC (pGC);
-	FreeScratchPixmapHeader (pPixmap);
+	pPicture = uxa_picture_from_pixman_image (pScreen, image, format);
 	pixman_image_unref (image);
+	if (!pPicture)
+	    return;
 
 	xRel = bounds.x1 + xSrc - xDst;
 	yRel = bounds.y1 + ySrc - yDst;