@@ -633,6 +633,65 @@ static int emit_copy(struct i915_request *rq,
return 0;
}
+static int scatter_list_length(struct scatterlist *sg)
+{
+ int len = 0;
+
+ while (sg && sg_dma_len(sg)) {
+ len += sg_dma_len(sg);
+ sg = sg_next(sg);
+ };
+
+ return len;
+}
+
+static void
+calculate_chunk_sz(struct drm_i915_private *i915, bool src_is_lmem,
+ int *src_sz, int *ccs_sz, u32 bytes_to_cpy,
+ u32 ccs_bytes_to_cpy)
+{
+ if (ccs_bytes_to_cpy) {
+ /*
+ * We can only copy the ccs data corresponding to
+ * the CHUNK_SZ of lmem which is
+ * GET_CCS_BYTES(i915, CHUNK_SZ))
+ */
+ *ccs_sz = min_t(int, ccs_bytes_to_cpy, GET_CCS_BYTES(i915, CHUNK_SZ));
+
+ if (!src_is_lmem)
+ /*
+ * When CHUNK_SZ is passed all the pages upto CHUNK_SZ
+ * will be taken for the blt. in Flat-ccs supported
+ * platform Smem obj will have more pages than required
+ * for main meory hence limit it to the required size
+ * for main memory
+ */
+ *src_sz = min_t(int, bytes_to_cpy, CHUNK_SZ);
+ } else { /* ccs handling is not required */
+ *src_sz = CHUNK_SZ;
+ }
+}
+
+static void get_ccs_sg_sgt(struct sgt_dma *it, u32 bytes_to_cpy)
+{
+ u32 len;
+
+ do {
+ GEM_BUG_ON(!it->sg || !sg_dma_len(it->sg));
+ len = it->max - it->dma;
+ if (len > bytes_to_cpy) {
+ it->dma += bytes_to_cpy;
+ break;
+ }
+
+ bytes_to_cpy -= len;
+
+ it->sg = __sg_next(it->sg);
+ it->dma = sg_dma_address(it->sg);
+ it->max = it->dma + sg_dma_len(it->sg);
+ } while (bytes_to_cpy);
+}
+
int
intel_context_migrate_copy(struct intel_context *ce,
const struct i915_deps *deps,
@@ -644,9 +703,15 @@ intel_context_migrate_copy(struct intel_context *ce,
bool dst_is_lmem,
struct i915_request **out)
{
- struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst);
+ struct sgt_dma it_src = sg_sgt(src), it_dst = sg_sgt(dst), it_ccs;
+ struct drm_i915_private *i915 = ce->engine->i915;
+ u32 ccs_bytes_to_cpy = 0, bytes_to_cpy;
+ enum i915_cache_level ccs_cache_level;
+ int src_sz, dst_sz, ccs_sz;
u32 src_offset, dst_offset;
+ u8 src_access, dst_access;
struct i915_request *rq;
+ bool ccs_is_src;
int err;
GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
@@ -655,6 +720,38 @@ intel_context_migrate_copy(struct intel_context *ce,
GEM_BUG_ON(ce->ring->size < SZ_64K);
+ src_sz = scatter_list_length(src);
+ bytes_to_cpy = src_sz;
+
+ if (HAS_FLAT_CCS(i915) && src_is_lmem ^ dst_is_lmem) {
+ src_access = !src_is_lmem && dst_is_lmem;
+ dst_access = !src_access;
+
+ dst_sz = scatter_list_length(dst);
+ if (src_is_lmem) {
+ it_ccs = it_dst;
+ ccs_cache_level = dst_cache_level;
+ ccs_is_src = false;
+ } else if (dst_is_lmem) {
+ bytes_to_cpy = dst_sz;
+ it_ccs = it_src;
+ ccs_cache_level = src_cache_level;
+ ccs_is_src = true;
+ }
+
+ /*
+ * When there is a eviction of ccs needed smem will have the
+ * extra pages for the ccs data
+ *
+ * TO-DO: Want to move the size mismatch check to a WARN_ON,
+ * but still we have some requests of smem->lmem with same size.
+ * Need to fix it.
+ */
+ ccs_bytes_to_cpy = src_sz != dst_sz ? GET_CCS_BYTES(i915, bytes_to_cpy) : 0;
+ if (ccs_bytes_to_cpy)
+ get_ccs_sg_sgt(&it_ccs, bytes_to_cpy);
+ }
+
src_offset = 0;
dst_offset = CHUNK_SZ;
if (HAS_64K_PAGES(ce->engine->i915)) {
@@ -694,8 +791,11 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
+ calculate_chunk_sz(i915, src_is_lmem, &src_sz, &ccs_sz,
+ bytes_to_cpy, ccs_bytes_to_cpy);
+
len = emit_pte(rq, &it_src, src_cache_level, src_is_lmem,
- src_offset, CHUNK_SZ);
+ src_offset, src_sz);
if (!len) {
err = -EINVAL;
goto out_rq;
@@ -718,7 +818,46 @@ intel_context_migrate_copy(struct intel_context *ce,
if (err)
goto out_rq;
- err = emit_copy(rq, dst_offset, src_offset, len);
+ err = emit_copy(rq, dst_offset, src_offset, len);
+ if (err)
+ goto out_rq;
+
+ bytes_to_cpy -= len;
+
+ if (ccs_bytes_to_cpy) {
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ err = emit_pte(rq, &it_ccs, ccs_cache_level, false,
+ ccs_is_src ? src_offset : dst_offset,
+ ccs_sz);
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /*
+ * Using max of src_sz and dst_sz, as we need to
+ * pass the lmem size corresponding to the ccs
+ * blocks we need to handle.
+ */
+ ccs_sz = max_t(int, ccs_is_src ? ccs_sz : src_sz,
+ ccs_is_src ? dst_sz : ccs_sz);
+
+ err = emit_copy_ccs(rq, dst_offset, dst_access,
+ src_offset, src_access, ccs_sz);
+ if (err)
+ goto out_rq;
+
+ err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
+ if (err)
+ goto out_rq;
+
+ /* Converting back to ccs bytes */
+ ccs_sz = GET_CCS_BYTES(rq->engine->i915, ccs_sz);
+ ccs_bytes_to_cpy -= ccs_sz;
+ }
/* Arbitration is re-enabled between requests. */
out_rq:
@@ -726,9 +865,26 @@ intel_context_migrate_copy(struct intel_context *ce,
i915_request_put(*out);
*out = i915_request_get(rq);
i915_request_add(rq);
- if (err || !it_src.sg || !sg_dma_len(it_src.sg))
+
+ if (err)
break;
+ if (!bytes_to_cpy && !ccs_bytes_to_cpy) {
+ if (src_is_lmem)
+ WARN_ON(it_src.sg && sg_dma_len(it_src.sg));
+ else
+ WARN_ON(it_dst.sg && sg_dma_len(it_dst.sg));
+ break;
+ }
+
+ if (WARN_ON(!it_src.sg || !sg_dma_len(it_src.sg) ||
+ !it_dst.sg || !sg_dma_len(it_dst.sg) ||
+ (ccs_bytes_to_cpy && (!it_ccs.sg ||
+ !sg_dma_len(it_ccs.sg))))) {
+ err = -EINVAL;
+ break;
+ }
+
cond_resched();
} while (1);