@@ -1064,7 +1064,7 @@ tile:
if (kgem->gen >= 0100) {
cmd |= 8;
do {
- int nbox_this_time, rem;
+ int nbox_this_time, rem, pitch_aligned;
nbox_this_time = nbox;
rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,19 @@ tile:
/* Count the total number of bytes to be read and allocate a
* single buffer large enough. Or if it is very small, combine
- * with other allocations. */
+ * with other allocations. Each sub-buffer starting point has
+ * to be aligned to 64 bytes to conform SKL+ hardware requirments.
+ * Align the pitch of each sub-buffer to 64 bytes for simplicities.
+ */
offset = 0;
for (n = 0; n < nbox_this_time; n++) {
int height = box[n].y2 - box[n].y1;
int width = box[n].x2 - box[n].x1;
- offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ if (kgem->gen >= 0110) {
+ pitch_aligned = ALIGN(PITCH(width, dst->drawable.bitsPerPixel >> 3), 64);
+ offset += pitch_aligned * height;
+ } else
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
}
src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,14 +1120,24 @@ tile:
assert(box->x1 + dst_dx >= 0);
assert(box->y1 + dst_dy >= 0);
- memcpy_blt(src, (char *)ptr + offset,
- dst->drawable.bitsPerPixel,
- stride, pitch,
- box->x1 + src_dx, box->y1 + src_dy,
- 0, 0,
- width, height);
+ if (kgem->gen >= 0110) {
+ pitch_aligned = ALIGN(pitch, 64);
+ memcpy_blt(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch_aligned,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height);
+ } else
+ memcpy_blt(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height);
assert(kgem->mode == KGEM_BLT);
+
b = kgem->batch + kgem->nbatch;
b[0] = cmd;
b[1] = br13;
@@ -1133,16 +1150,22 @@ tile:
KGEM_RELOC_FENCED,
0);
b[6] = 0;
- b[7] = pitch;
+ if (kgem->gen >= 0110)
+ b[7] = pitch_aligned;
+ else
+ b[7] = pitch;
+
*(uint64_t *)(b+8) =
kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
I915_GEM_DOMAIN_RENDER << 16 |
KGEM_RELOC_FENCED,
offset);
kgem->nbatch += 10;
-
box++;
- offset += pitch * height;
+ if (kgem->gen >= 0110)
+ offset += pitch_aligned * height;
+ else
+ offset += pitch * height;
} while (--nbox_this_time);
assert(offset == __kgem_buffer_size(src_bo));
sigtrap_put();
On SKL+ the linear source buffer has to start from cache line boundary to meet the 2d engine source copy requirements. Apply this cache line alignment policy for SKL+ only. v2: Apply these changes only to SKL+ for not breaking old platforms based on Chris Wilson's reviews. Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Guang Bai <guang.bai@intel.com> --- src/sna/sna_io.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-)