@@ -1064,7 +1064,7 @@ tile:
if (kgem->gen >= 0100) {
cmd |= 8;
do {
- int nbox_this_time, rem;
+ int nbox_this_time, rem, pitch_aligned;
nbox_this_time = nbox;
rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,16 @@ tile:
/* Count the total number of bytes to be read and allocate a
* single buffer large enough. Or if it is very small, combine
- * with other allocations. */
+ * with other allocations. Each sub-buffer starting point has
+ * to be aligned to 64 bytes to conform latest hardware requirments.
+ * Align the pitch of each sub-buffer to 64 bytes for simplicities.
+ */
offset = 0;
for (n = 0; n < nbox_this_time; n++) {
int height = box[n].y2 - box[n].y1;
int width = box[n].x2 - box[n].x1;
- offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ pitch_aligned = ALIGN(PITCH(width, dst->drawable.bitsPerPixel >> 3), 64);
+ offset += pitch_aligned * height;
}
src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,9 +1117,10 @@ tile:
assert(box->x1 + dst_dx >= 0);
assert(box->y1 + dst_dy >= 0);
+ pitch_aligned = ALIGN(pitch, 64);
memcpy_blt(src, (char *)ptr + offset,
dst->drawable.bitsPerPixel,
- stride, pitch,
+ stride, pitch_aligned,
box->x1 + src_dx, box->y1 + src_dy,
0, 0,
width, height);
@@ -1133,7 +1138,7 @@ tile:
KGEM_RELOC_FENCED,
0);
b[6] = 0;
- b[7] = pitch;
+ b[7] = pitch_aligned;
*(uint64_t *)(b+8) =
kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
I915_GEM_DOMAIN_RENDER << 16 |
@@ -1142,7 +1147,7 @@ tile:
kgem->nbatch += 10;
box++;
- offset += pitch * height;
+ offset += pitch_aligned * height;
} while (--nbox_this_time);
assert(offset == __kgem_buffer_size(src_bo));
sigtrap_put();
On SKL+ the linear source buffer has to start from cache line boundary to meet the 2d engine source copy requirements. Signed-off-by: Guang Bai <guang.bai@intel.com> --- src/sna/sna_io.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-)