[RFC,51/60] drm/i915/lmem: support optional CPU clearing for special internal use
diff mbox series

Message ID 20200710115757.290984-52-matthew.auld@intel.com
State New
Headers show
Series
  • DG1 LMEM enabling
Related show

Commit Message

Matthew Auld July 10, 2020, 11:57 a.m. UTC
For some internal device local-memory objects it would be useful to have
an option to CPU clear the pages upon gathering the backing store. Note
that this might be before the blitter is useable, which is the case for
some internal GuC objects.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  8 +-
 drivers/gpu/drm/i915/gem/i915_gem_region.c    | 20 +++++
 .../drm/i915/selftests/intel_memory_region.c  | 83 +++++++++++++++++++
 3 files changed, 108 insertions(+), 3 deletions(-)

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 848ac102b788..0ae33c7a9239 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -148,9 +148,11 @@  struct drm_i915_gem_object {
 	unsigned long flags;
 #define I915_BO_ALLOC_CONTIGUOUS BIT(0)
 #define I915_BO_ALLOC_VOLATILE   BIT(1)
-#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE)
-#define I915_BO_READONLY         BIT(2)
-
+#define I915_BO_ALLOC_CPU_CLEAR  BIT(2)
+#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
+			     I915_BO_ALLOC_VOLATILE | \
+			     I915_BO_ALLOC_CPU_CLEAR)
+#define I915_BO_READONLY         BIT(3)
 	/*
 	 * Is the object to be mapped as read-only to the GPU
 	 * Only honoured if hardware has relevant pte bit
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
index cc8116e63808..88860169d340 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -95,6 +95,26 @@  i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
 	sg_mark_end(sg);
 	i915_sg_trim(st);
 
+	/* Intended for kernel internal use only */
+	if (obj->flags & I915_BO_ALLOC_CPU_CLEAR) {
+		struct scatterlist *sg;
+		unsigned long i;
+
+		for_each_sg(st->sgl, sg, st->nents, i) {
+			unsigned int length;
+			void __iomem *vaddr;
+			dma_addr_t daddr;
+
+			daddr = sg_dma_address(sg);
+			daddr -= mem->region.start;
+			length = sg_dma_len(sg);
+
+			vaddr = io_mapping_map_wc(&mem->iomap, daddr, length);
+			memset64(vaddr, 0, length / sizeof(u64));
+			io_mapping_unmap(vaddr);
+		}
+	}
+
 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index 6e80d99048e4..d10f5b52b677 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -396,6 +396,88 @@  static int igt_lmem_create(void *arg)
 	return err;
 }
 
+static int igt_lmem_create_cleared_cpu(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	u32 size, val, i;
+	int err;
+
+	i915_gem_drain_freed_objects(i915);
+
+	size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng));
+	size = round_up(size, PAGE_SIZE);
+	i = 0;
+
+	do {
+		struct drm_i915_gem_object *obj;
+		void __iomem *vaddr;
+		unsigned int flags;
+		unsigned long n;
+		u32 dword;
+
+		/*
+		 * Alternate between cleared and uncleared allocations, while
+		 * also dirtying the pages each time to check that they either
+		 * remain dirty or are indeed cleared. Allocations should be
+		 * deterministic.
+		 */
+
+		flags = I915_BO_ALLOC_CPU_CLEAR;
+		if (i & 1)
+			flags = 0;
+		else
+			val = 0;
+
+		obj = i915_gem_object_create_lmem(i915, size, flags);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err)
+			goto out_put;
+
+		dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32),
+						   &prng);
+
+		err = igt_cpu_check(obj, dword, val);
+		if (err) {
+			pr_err("%s failed with size=%u, flags=%u\n",
+			       __func__, size, flags);
+			goto out_unpin;
+		}
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto out_unpin;
+		}
+
+		val = prandom_u32_state(&prng);
+
+		for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
+			memset32(vaddr + n * PAGE_SIZE, val,
+				 PAGE_SIZE / sizeof(u32));
+		}
+
+		i915_gem_object_unpin_map(obj);
+out_unpin:
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj);
+out_put:
+		i915_gem_object_put(obj);
+
+		if (err)
+			break;
+		++i;
+	} while (!__igt_timeout(end_time, NULL));
+
+	pr_info("%s completed (%u) iterations\n", __func__, i);
+
+	return err;
+}
+
 static int igt_lmem_write_gpu(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -799,6 +881,7 @@  int intel_memory_region_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_lmem_create),
+		SUBTEST(igt_lmem_create_cleared_cpu),
 		SUBTEST(igt_lmem_write_cpu),
 		SUBTEST(igt_lmem_write_gpu),
 	};