diff mbox series

[5/8] drm/i915: Align start for memcpy_from_wc

Message ID 20191207170110.2200142-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/8] drm/i915: Fix cmdparser drm.debug | expand

Commit Message

Chris Wilson Dec. 7, 2019, 5:01 p.m. UTC
The movntqda requires 16-byte alignment for the source pointer. Avoid
falling back to clflush if the source pointer is misaligned by doing the
doing a small uncached memcpy to fixup the alignments.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 30 +++++++++++++++++---------
 1 file changed, 20 insertions(+), 10 deletions(-)

Comments

Joonas Lahtinen Dec. 11, 2019, 10:03 a.m. UTC | #1
Quoting Chris Wilson (2019-12-07 19:01:07)
> The movntqda requires 16-byte alignment for the source pointer. Avoid
> falling back to clflush if the source pointer is misaligned by doing the
> doing a small uncached memcpy to fixup the alignments.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -1150,19 +1150,30 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
>                 return ERR_PTR(ret);
>         }
>  
> +       ptr = dst;

I'd add a newline here as this will propagate to the later branch if WC
map fails.

>         src = ERR_PTR(-ENODEV);
> -       if (src_needs_clflush &&
> -           i915_can_memcpy_from_wc(NULL, offset, 0)) {
> +       if (src_needs_clflush && i915_has_memcpy_from_wc()) {
>                 src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
>                 if (!IS_ERR(src)) {
> -                       i915_memcpy_from_wc(dst,
> -                                           src + offset,
> -                                           ALIGN(length, 16));
> +                       src += offset;
> +
> +                       if (!IS_ALIGNED(offset, 16)) {
> +                               len = min(ALIGN(offset, 16) - offset, length);
> +
> +                               memcpy(ptr, src, len);
> +
> +                               offset += len;
> +                               length -= len;
> +                               ptr += len;
> +                               src += len;
> +                       }
> +                       GEM_BUG_ON(!IS_ALIGNED((unsigned long)src, 16));
> +
> +                       i915_memcpy_from_wc(ptr, src, ALIGN(length, 16));

Could be a helper function.

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 6cf4e336461b..2977316d64ae 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1132,8 +1132,8 @@  static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 {
 	unsigned int src_needs_clflush;
 	unsigned int dst_needs_clflush;
-	void *dst, *src;
-	int ret;
+	void *dst, *src, *ptr;
+	int ret, len;
 
 	ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
 	if (ret)
@@ -1150,19 +1150,30 @@  static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 		return ERR_PTR(ret);
 	}
 
+	ptr = dst;
 	src = ERR_PTR(-ENODEV);
-	if (src_needs_clflush &&
-	    i915_can_memcpy_from_wc(NULL, offset, 0)) {
+	if (src_needs_clflush && i915_has_memcpy_from_wc()) {
 		src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
 		if (!IS_ERR(src)) {
-			i915_memcpy_from_wc(dst,
-					    src + offset,
-					    ALIGN(length, 16));
+			src += offset;
+
+			if (!IS_ALIGNED(offset, 16)) {
+				len = min(ALIGN(offset, 16) - offset, length);
+
+				memcpy(ptr, src, len);
+
+				offset += len;
+				length -= len;
+				ptr += len;
+				src += len;
+			}
+			GEM_BUG_ON(!IS_ALIGNED((unsigned long)src, 16));
+
+			i915_memcpy_from_wc(ptr, src, ALIGN(length, 16));
 			i915_gem_object_unpin_map(src_obj);
 		}
 	}
 	if (IS_ERR(src)) {
-		void *ptr;
 		int x, n;
 
 		/*
@@ -1177,10 +1188,9 @@  static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 			length = round_up(length,
 					  boot_cpu_data.x86_clflush_size);
 
-		ptr = dst;
 		x = offset_in_page(offset);
 		for (n = offset >> PAGE_SHIFT; length; n++) {
-			int len = min_t(int, length, PAGE_SIZE - x);
+			len = min_t(int, length, PAGE_SIZE - x);
 
 			src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
 			if (src_needs_clflush)