@@ -951,7 +951,8 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
{
unsigned int src_needs_clflush;
unsigned int dst_needs_clflush;
- void *src, *dst;
+ void *dst, *ptr;
+ int offset, n;
int ret;
ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush);
@@ -964,30 +965,33 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
goto unpin_src;
}
- src = i915_gem_object_pin_map(src_obj, I915_MAP_WB);
- if (IS_ERR(src)) {
- dst = src;
- goto unpin_dst;
- }
-
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB);
if (IS_ERR(dst))
- goto unmap_src;
-
- src += batch_start_offset;
- if (src_needs_clflush)
- drm_clflush_virt_range(src, batch_len);
+ goto unpin_dst;
+ ptr = dst;
+ offset = offset_in_page(batch_start_offset);
if (dst_needs_clflush & CLFLUSH_BEFORE)
batch_len = roundup(batch_len, boot_cpu_data.x86_clflush_size);
- memcpy(dst, src, batch_len);
+ for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) {
+ int len = min_t(int, batch_len, PAGE_SIZE - offset);
+ void *vaddr;
+
+ vaddr = kmap_atomic(i915_gem_object_get_page(src_obj, n));
+ if (src_needs_clflush)
+ drm_clflush_virt_range(vaddr + offset, len);
+ memcpy(ptr, vaddr + offset, len);
+ kunmap_atomic(vaddr);
+
+ ptr += len;
+ batch_len -= len;
+ offset = 0;
+ }
/* dst_obj is returned with vmap pinned */
*needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-unmap_src:
- i915_gem_object_unpin_map(src_obj);
unpin_dst:
i915_gem_object_unpin_pages(dst_obj);
unpin_src:
For simplicity, we want to continue using a contiguous mapping of the command buffer, but we can reduce the number of vmappings we hold by switching over to a page-by-page copy from the user batch buffer to the shadow. The cost for saving one linear mapping is about 5% in trivial workloads - which is more or less the overhead in calling kmap_atomic(). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_cmd_parser.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-)