From patchwork Fri Jan 18 16:31:14 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 2003921 Return-Path: X-Original-To: patchwork-dri-devel@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by patchwork2.kernel.org (Postfix) with ESMTP id 5B852DF280 for ; Fri, 18 Jan 2013 16:50:59 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 590BCE5F06 for ; Fri, 18 Jan 2013 08:50:59 -0800 (PST) X-Original-To: dri-devel@lists.freedesktop.org Delivered-To: dri-devel@lists.freedesktop.org X-Greylist: delayed 1024 seconds by postgrey-1.32 at gabe; Fri, 18 Jan 2013 08:50:32 PST Received: from fireflyinternet.com (smtp.fireflyinternet.com [109.228.6.236]) by gabe.freedesktop.org (Postfix) with ESMTP id 2976DE5C28 for ; Fri, 18 Jan 2013 08:50:31 -0800 (PST) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.73.22; Received: from arrandale.alporthouse.com (unverified [78.156.73.22]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 128072599-1500050 for multiple; Fri, 18 Jan 2013 16:33:13 +0000 From: Chris Wilson To: dri-devel@lists.freedesktop.org Subject: [PATCH] drm/udl: Inline memcmp() for RLE compression of xfer Date: Fri, 18 Jan 2013 16:31:14 +0000 Message-Id: <1358526674-10786-1-git-send-email-chris@chris-wilson.co.uk> X-Mailer: git-send-email 1.7.10.4 X-Originating-IP: 78.156.73.22 Cc: Dave Airlie X-BeenThere: dri-devel@lists.freedesktop.org X-Mailman-Version: 2.1.13 Precedence: list List-Id: Direct Rendering Infrastructure - Development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org Errors-To: dri-devel-bounces+patchwork-dri-devel=patchwork.kernel.org@lists.freedesktop.org As we use a variable length the compiler does not realise that it is a fixed value of either 2 or 4 bytes. Instead of performing the inline comparison itself, the compiler inserts a function call to the generic memcmp routine which is optimised for long comparisons of variable length. That turns out to be quite expensive... Signed-off-by: Chris Wilson Cc: Dave Airlie --- drivers/gpu/drm/udl/udl_transfer.c | 46 ++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c index 142fee5..f343db7 100644 --- a/drivers/gpu/drm/udl/udl_transfer.c +++ b/drivers/gpu/drm/udl/udl_transfer.c @@ -75,15 +75,19 @@ static int udl_trim_hline(const u8 *bback, const u8 **bfront, int *width_bytes) } #endif -static inline u16 pixel32_to_be16p(const uint8_t *pixel) +static inline u16 pixel32_to_be16(const uint32_t pixel) { - uint32_t pix = *(uint32_t *)pixel; - u16 retval; + return (((pixel >> 3) & 0x001f) | + ((pixel >> 5) & 0x07e0) | + ((pixel >> 8) & 0xf800)); +} - retval = (((pix >> 3) & 0x001f) | - ((pix >> 5) & 0x07e0) | - ((pix >> 8) & 0xf800)); - return retval; +static bool pixel_repeats(const void *pixel, const uint32_t repeat, int bpp) +{ + if (bpp == 2) + return *(const uint16_t *)pixel == repeat; + else + return *(const uint32_t *)pixel == repeat; } /* @@ -152,29 +156,33 @@ static void udl_compress_hline16( prefetch_range((void *) pixel, (cmd_pixel_end - pixel) * bpp); while (pixel < cmd_pixel_end) { - const u8 * const repeating_pixel = pixel; - - if (bpp == 2) - *(uint16_t *)cmd = cpu_to_be16p((uint16_t *)pixel); - else if (bpp == 4) - *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16p(pixel)); + const u8 *const start = pixel; + u32 repeating_pixel; + + if (bpp == 2) { + repeating_pixel = *(uint16_t *)pixel; + *(uint16_t *)cmd = cpu_to_be16(repeating_pixel); + } else { + repeating_pixel = *(uint32_t *)pixel; + *(uint16_t *)cmd = cpu_to_be16(pixel32_to_be16(repeating_pixel)); + } cmd += 2; pixel += bpp; if (unlikely((pixel < cmd_pixel_end) && - (!memcmp(pixel, repeating_pixel, bpp)))) { + (pixel_repeats(pixel, repeating_pixel, bpp)))) { /* go back and fill in raw pixel count */ - *raw_pixels_count_byte = (((repeating_pixel - + *raw_pixels_count_byte = (((start - raw_pixel_start) / bpp) + 1) & 0xFF; - while ((pixel < cmd_pixel_end) - && (!memcmp(pixel, repeating_pixel, bpp))) { + while ((pixel < cmd_pixel_end) && + (pixel_repeats(pixel, repeating_pixel, bpp))) { pixel += bpp; } /* immediately after raw data is repeat byte */ - *cmd++ = (((pixel - repeating_pixel) / bpp) - 1) & 0xFF; + *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; /* Then start another raw pixel span */ raw_pixel_start = pixel; @@ -223,6 +231,8 @@ int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, u8 *cmd = *urb_buf_ptr; u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; + BUG_ON(!(bpp == 2 || bpp == 4)); + line_start = (u8 *) (front + byte_offset); next_pixel = line_start; line_end = next_pixel + byte_width;