diff mbox series

[xf86-video-intel] sna/io: Align the linear source buffer to cache line for 2d blt of SKL+

Message ID 1536107092-15013-1-git-send-email-guang.bai@intel.com (mailing list archive)
State New, archived
Headers show
Series [xf86-video-intel] sna/io: Align the linear source buffer to cache line for 2d blt of SKL+ | expand

Commit Message

Guang Bai Sept. 5, 2018, 12:24 a.m. UTC
On SKL+ the linear source buffer has to start from cache line boundary
to meet the 2d engine source copy requirements. Apply this cache line
alignment policy for SKL+ only.

v2: Apply these changes only to SKL+ for not breaking old platforms
    based on Chris Wilson's reviews.

Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Guang Bai <guang.bai@intel.com>
---
 src/sna/sna_io.c | 47 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index d32bd58..ae82d1f 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -1064,7 +1064,7 @@  tile:
 	if (kgem->gen >= 0100) {
 		cmd |= 8;
 		do {
-			int nbox_this_time, rem;
+			int nbox_this_time, rem, pitch_aligned;
 
 			nbox_this_time = nbox;
 			rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,19 @@  tile:
 
 			/* Count the total number of bytes to be read and allocate a
 			 * single buffer large enough. Or if it is very small, combine
-			 * with other allocations. */
+			 * with other allocations. Each sub-buffer starting point has
+			 * to be aligned to 64 bytes to conform SKL+ hardware requirments.
+			 * Align the pitch of each sub-buffer to 64 bytes for simplicities.
+			 */
 			offset = 0;
 			for (n = 0; n < nbox_this_time; n++) {
 				int height = box[n].y2 - box[n].y1;
 				int width = box[n].x2 - box[n].x1;
-				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+				if (kgem->gen >= 0110) {
+					pitch_aligned = ALIGN(PITCH(width, dst->drawable.bitsPerPixel >> 3), 64);
+					offset += pitch_aligned * height;
+				} else
+					offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
 			}
 
 			src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,14 +1120,24 @@  tile:
 					assert(box->x1 + dst_dx >= 0);
 					assert(box->y1 + dst_dy >= 0);
 
-					memcpy_blt(src, (char *)ptr + offset,
-						   dst->drawable.bitsPerPixel,
-						   stride, pitch,
-						   box->x1 + src_dx, box->y1 + src_dy,
-						   0, 0,
-						   width, height);
+					if (kgem->gen >= 0110) {
+						pitch_aligned = ALIGN(pitch, 64);
+						memcpy_blt(src, (char *)ptr + offset,
+							   dst->drawable.bitsPerPixel,
+							   stride, pitch_aligned,
+							   box->x1 + src_dx, box->y1 + src_dy,
+							   0, 0,
+							   width, height);
+					} else
+						memcpy_blt(src, (char *)ptr + offset,
+							   dst->drawable.bitsPerPixel,
+							   stride, pitch,
+							   box->x1 + src_dx, box->y1 + src_dy,
+							   0, 0,
+							   width, height);
 
 					assert(kgem->mode == KGEM_BLT);
+
 					b = kgem->batch + kgem->nbatch;
 					b[0] = cmd;
 					b[1] = br13;
@@ -1133,16 +1150,22 @@  tile:
 								 KGEM_RELOC_FENCED,
 								 0);
 					b[6] = 0;
-					b[7] = pitch;
+					if (kgem->gen >= 0110)
+						b[7] = pitch_aligned;
+					else
+						b[7] = pitch;
+
 					*(uint64_t *)(b+8) =
 						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
 								 I915_GEM_DOMAIN_RENDER << 16 |
 								 KGEM_RELOC_FENCED,
 								 offset);
 					kgem->nbatch += 10;
-
 					box++;
-					offset += pitch * height;
+					if (kgem->gen >= 0110)
+						offset += pitch_aligned * height;
+					else
+						offset += pitch * height;
 				} while (--nbox_this_time);
 				assert(offset == __kgem_buffer_size(src_bo));
 				sigtrap_put();