From patchwork Sun Jun 3 14:41:00 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mikulas Patocka X-Patchwork-Id: 10445363 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork.web.codeaurora.org (Postfix) with ESMTP id 614E96022E for ; Sun, 3 Jun 2018 15:18:58 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 4EBDA289ED for ; Sun, 3 Jun 2018 15:18:58 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 4363928ACB; Sun, 3 Jun 2018 15:18:58 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00, MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A0F34289ED for ; Sun, 3 Jun 2018 15:18:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751654AbeFCPS5 (ORCPT ); Sun, 3 Jun 2018 11:18:57 -0400 Received: from 109-183-129-149.tmcz.cz ([109.183.129.149]:58998 "EHLO leontynka.twibright.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751637AbeFCPS4 (ORCPT ); Sun, 3 Jun 2018 11:18:56 -0400 Received: from root by leontynka.twibright.com with local (Exim 4.89) (envelope-from ) id 1fPUD7-0003qv-Sq; Sun, 03 Jun 2018 16:42:21 +0200 Message-Id: <20180603144221.605787225@twibright.com> User-Agent: quilt/0.63-1 Date: Sun, 03 Jun 2018 16:41:00 +0200 From: Mikulas Patocka To: Mikulas Patocka , Bartlomiej Zolnierkiewicz , Dave Airlie , Bernie Thompson , Ladislav Michl Cc: dri-devel@lists.freedesktop.org, linux-fbdev@vger.kernel.org Subject: [PATCH 07/21] udl-kms: avoid division References: <20180603144053.875668929@twibright.com> MIME-Version: 1.0 Content-Disposition: inline; filename=udlkms-avoid-division.patch Sender: linux-fbdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fbdev@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Division is slow, so it shouldn't be done by the pixel generating code. The driver supports only 2 or 4 bytes per pixel, so we can replace division with a shift. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org --- drivers/gpu/drm/udl/udl_drv.h | 2 - drivers/gpu/drm/udl/udl_fb.c | 15 ++++++++------ drivers/gpu/drm/udl/udl_transfer.c | 39 ++++++++++++++++++------------------- 3 files changed, 30 insertions(+), 26 deletions(-) -- To unsubscribe from this list: send the line "unsubscribe linux-fbdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Index: linux-4.17-rc7/drivers/gpu/drm/udl/udl_drv.h =================================================================== --- linux-4.17-rc7.orig/drivers/gpu/drm/udl/udl_drv.h 2018-06-03 13:15:01.000000000 +0200 +++ linux-4.17-rc7/drivers/gpu/drm/udl/udl_drv.h 2018-06-03 13:15:01.000000000 +0200 @@ -110,7 +110,7 @@ udl_fb_user_fb_create(struct drm_device struct drm_file *file, const struct drm_mode_fb_cmd2 *mode_cmd); -int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, +int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, const char *front, char **urb_buf_ptr, u32 byte_offset, u32 device_byte_offset, u32 byte_width, int *ident_ptr, int *sent_ptr); Index: linux-4.17-rc7/drivers/gpu/drm/udl/udl_fb.c =================================================================== --- linux-4.17-rc7.orig/drivers/gpu/drm/udl/udl_fb.c 2018-06-03 13:15:01.000000000 +0200 +++ linux-4.17-rc7/drivers/gpu/drm/udl/udl_fb.c 2018-06-03 13:15:01.000000000 +0200 @@ -91,7 +91,10 @@ int udl_handle_damage(struct udl_framebu int bytes_identical = 0; struct urb *urb; int aligned_x; - int bpp = fb->base.format->cpp[0]; + int log_bpp; + + BUG_ON(!is_power_of_2(fb->base.format->cpp[0])); + log_bpp = __ffs(fb->base.format->cpp[0]); if (!fb->active_16) return 0; @@ -126,12 +129,12 @@ int udl_handle_damage(struct udl_framebu for (i = y; i < y + height ; i++) { const int line_offset = fb->base.pitches[0] * i; - const int byte_offset = line_offset + (x * bpp); - const int dev_byte_offset = (fb->base.width * bpp * i) + (x * bpp); - if (udl_render_hline(dev, bpp, &urb, + const int byte_offset = line_offset + (x << log_bpp); + const int dev_byte_offset = (fb->base.width * i + x) << log_bpp; + if (udl_render_hline(dev, log_bpp, &urb, (char *) fb->obj->vmapping, &cmd, byte_offset, dev_byte_offset, - width * bpp, + width << log_bpp, &bytes_identical, &bytes_sent)) goto error; } @@ -150,7 +153,7 @@ int udl_handle_damage(struct udl_framebu error: atomic_add(bytes_sent, &udl->bytes_sent); atomic_add(bytes_identical, &udl->bytes_identical); - atomic_add(width*height*bpp, &udl->bytes_rendered); + atomic_add((width * height) << log_bpp, &udl->bytes_rendered); end_cycles = get_cycles(); atomic_add(((unsigned int) ((end_cycles - start_cycles) >> 10)), /* Kcycles */ Index: linux-4.17-rc7/drivers/gpu/drm/udl/udl_transfer.c =================================================================== --- linux-4.17-rc7.orig/drivers/gpu/drm/udl/udl_transfer.c 2018-06-03 13:15:01.000000000 +0200 +++ linux-4.17-rc7/drivers/gpu/drm/udl/udl_transfer.c 2018-06-03 13:15:01.000000000 +0200 @@ -83,12 +83,12 @@ static inline u16 pixel32_to_be16(const ((pixel >> 8) & 0xf800)); } -static inline u16 get_pixel_val16(const uint8_t *pixel, int bpp) +static inline u16 get_pixel_val16(const uint8_t *pixel, int log_bpp) { - u16 pixel_val16 = 0; - if (bpp == 2) + u16 pixel_val16; + if (log_bpp == 1) pixel_val16 = *(const uint16_t *)pixel; - else if (bpp == 4) + else pixel_val16 = pixel32_to_be16(*(const uint32_t *)pixel); return pixel_val16; } @@ -125,8 +125,9 @@ static void udl_compress_hline16( const u8 *const pixel_end, uint32_t *device_address_ptr, uint8_t **command_buffer_ptr, - const uint8_t *const cmd_buffer_end, int bpp) + const uint8_t *const cmd_buffer_end, int log_bpp) { + const int bpp = 1 << log_bpp; const u8 *pixel = *pixel_start_ptr; uint32_t dev_addr = *device_address_ptr; uint8_t *cmd = *command_buffer_ptr; @@ -153,12 +154,12 @@ static void udl_compress_hline16( raw_pixels_count_byte = cmd++; /* we'll know this later */ raw_pixel_start = pixel; - cmd_pixel_end = pixel + min3(MAX_CMD_PIXELS + 1UL, - (unsigned long)(pixel_end - pixel) / bpp, - (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) * bpp; + cmd_pixel_end = pixel + (min3(MAX_CMD_PIXELS + 1UL, + (unsigned long)(pixel_end - pixel) >> log_bpp, + (unsigned long)(cmd_buffer_end - 1 - cmd) / 2) << log_bpp); prefetch_range((void *) pixel, cmd_pixel_end - pixel); - pixel_val16 = get_pixel_val16(pixel, bpp); + pixel_val16 = get_pixel_val16(pixel, log_bpp); while (pixel < cmd_pixel_end) { const u8 *const start = pixel; @@ -170,7 +171,7 @@ static void udl_compress_hline16( pixel += bpp; while (pixel < cmd_pixel_end) { - pixel_val16 = get_pixel_val16(pixel, bpp); + pixel_val16 = get_pixel_val16(pixel, log_bpp); if (pixel_val16 != repeating_pixel_val16) break; pixel += bpp; @@ -179,10 +180,10 @@ static void udl_compress_hline16( if (unlikely(pixel > start + bpp)) { /* go back and fill in raw pixel count */ *raw_pixels_count_byte = (((start - - raw_pixel_start) / bpp) + 1) & 0xFF; + raw_pixel_start) >> log_bpp) + 1) & 0xFF; /* immediately after raw data is repeat byte */ - *cmd++ = (((pixel - start) / bpp) - 1) & 0xFF; + *cmd++ = (((pixel - start) >> log_bpp) - 1) & 0xFF; /* Then start another raw pixel span */ raw_pixel_start = pixel; @@ -192,14 +193,14 @@ static void udl_compress_hline16( if (pixel > raw_pixel_start) { /* finalize last RAW span */ - *raw_pixels_count_byte = ((pixel-raw_pixel_start) / bpp) & 0xFF; + *raw_pixels_count_byte = ((pixel - raw_pixel_start) >> log_bpp) & 0xFF; } else { /* undo unused byte */ cmd--; } - *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) / bpp) & 0xFF; - dev_addr += ((pixel - cmd_pixel_start) / bpp) * 2; + *cmd_pixels_count_byte = ((pixel - cmd_pixel_start) >> log_bpp) & 0xFF; + dev_addr += ((pixel - cmd_pixel_start) >> log_bpp) * 2; } if (cmd_buffer_end <= MIN_RLX_CMD_BYTES + cmd) { @@ -222,19 +223,19 @@ static void udl_compress_hline16( * (that we can only write to, slowly, and can never read), and (optionally) * our shadow copy that tracks what's been sent to that hardware buffer. */ -int udl_render_hline(struct drm_device *dev, int bpp, struct urb **urb_ptr, +int udl_render_hline(struct drm_device *dev, int log_bpp, struct urb **urb_ptr, const char *front, char **urb_buf_ptr, u32 byte_offset, u32 device_byte_offset, u32 byte_width, int *ident_ptr, int *sent_ptr) { const u8 *line_start, *line_end, *next_pixel; - u32 base16 = 0 + (device_byte_offset / bpp) * 2; + u32 base16 = 0 + (device_byte_offset >> log_bpp) * 2; struct urb *urb = *urb_ptr; u8 *cmd = *urb_buf_ptr; u8 *cmd_end = (u8 *) urb->transfer_buffer + urb->transfer_buffer_length; - BUG_ON(!(bpp == 2 || bpp == 4)); + BUG_ON(!(log_bpp == 1 || log_bpp == 2)); line_start = (u8 *) (front + byte_offset); next_pixel = line_start; @@ -244,7 +245,7 @@ int udl_render_hline(struct drm_device * udl_compress_hline16(&next_pixel, line_end, &base16, - (u8 **) &cmd, (u8 *) cmd_end, bpp); + (u8 **) &cmd, (u8 *) cmd_end, log_bpp); if (cmd >= cmd_end) { int len = cmd - (u8 *) urb->transfer_buffer;