diff mbox

intel: New libdrm interface to create uncached CPU mapping

Message ID 1414060625-29603-1-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Oct. 23, 2014, 10:37 a.m. UTC
From: Akash Goel <akash.goel@intel.com>

A new libdrm interface 'drm_intel_gem_bo_map_uc' is provided
by this patch. Through this interface Gfx clients can create
uncached virtual mappings of the Gem object. It will provide the
same funtionality of 'mmap_gtt' interface without the constraints
of limited aperture space, but provided clients handles the linear
to tile conversion on their own.
This patch is intended for improving the CPU write operation performance,
as with such mapping, writes are almost 50% faster than with mmap_gtt.
Also it avoids the Cache flush after update from CPU side, when object is
passed on to GPU, which will be the case if regular mmap interface is used.
This type of mapping is specially useful in case of sub-region
update, i.e. when only a portion of the object is to be updated.
Also there is a support for the unsynchronized version of this interface
named 'drm_intel_gem_bo_map_uc_unsynchronized', through which uncached
virtual mappings, but unsynchronized one, can be created of the Gem object.
To ensure the cache coherency, before using this mapping, the GTT domain has
been reused here. This provides the required Cache flush if the object is in
CPU domain or synchronization against the concurrent rendering
A new field 'flags' has been added to 'drm_i915_gem_mmap' structure,
used in gem_mmap ioctl, which allows to convey the required mapping
type as uncached type. User can query the driver for the support of this
mapping through the get_params. For that a new define
I915_PARAM_HAS_UC_MMAP has been added.

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 include/drm/i915_drm.h   |   4 ++
 intel/intel_bufmgr.h     |   3 ++
 intel/intel_bufmgr_gem.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)
diff mbox

Patch

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 15dd01d..cd97ff9 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -340,6 +340,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
+#define I915_PARAM_HAS_UC_MMAP		 29
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -487,6 +488,9 @@  struct drm_i915_gem_mmap {
 	 * This is a fixed-size type for 32/64 compatibility.
 	 */
 	__u64 addr_ptr;
+
+#define I915_GEM_USE_UNCACHED_MMAP (1<<0)
+	__u64 flags;
 };
 
 struct drm_i915_gem_mmap_gtt {
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index be83a56..0a7c43a 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -177,6 +177,9 @@  void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
 int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
 int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
 int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+int drm_intel_gem_bo_map_uc_unsynchronized(drm_intel_bo *bo);
+int drm_intel_gem_bo_map_uc(drm_intel_bo *bo);
+int drm_intel_gem_bo_unmap_uc(drm_intel_bo *bo);
 
 int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
 void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index f2f4fea..f094ce8 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -184,6 +184,8 @@  struct _drm_intel_bo_gem {
 	int reloc_count;
 	/** Mapped address for the buffer, saved across map/unmap cycles */
 	void *mem_virtual;
+	/** Uncached Mapped address for the buffer, saved across map/unmap cycles */
+	void *mem_uc_virtual;
 	/** GTT virtual address for the buffer, saved across map/unmap cycles */
 	void *gtt_virtual;
 	/**
@@ -1267,6 +1269,121 @@  static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
 	}
 }
 
+static int
+map_uncached(drm_intel_bo *bo)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	int ret;
+
+	if (bo_gem->is_userptr)
+		return -EINVAL;
+
+	if (bo_gem->map_count++ == 0)
+		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
+
+	/* Get a mapping of the buffer if we haven't before. */
+	if (bo_gem->mem_uc_virtual == NULL) {
+		struct drm_i915_gem_mmap mmap_arg;
+
+		DBG("bo_map_uc: mmap %d (%s), map_count=%d\n",
+		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
+
+		VG_CLEAR(mmap_arg);
+		mmap_arg.handle = bo_gem->gem_handle;
+		/* To indicate the uncached virtual mapping to KMD */
+		mmap_arg.flags = I915_GEM_USE_UNCACHED_MMAP;
+		mmap_arg.offset = 0;
+		mmap_arg.size = bo->size;
+		ret = drmIoctl(bufmgr_gem->fd,
+			       DRM_IOCTL_I915_GEM_MMAP,
+			       &mmap_arg);
+		if (ret != 0) {
+			ret = -errno;
+			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
+			    __FILE__, __LINE__, bo_gem->gem_handle,
+			    bo_gem->name, strerror(errno));
+			if (--bo_gem->map_count == 0)
+				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
+			pthread_mutex_unlock(&bufmgr_gem->lock);
+			return ret;
+		}
+		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
+		bo_gem->mem_uc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
+	}
+
+	bo->virtual = bo_gem->mem_uc_virtual;
+
+	DBG("bo_map_uc: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
+	    bo_gem->mem_uc_virtual);
+
+	return 0;
+}
+
+/* To be used in a similar way to mmap_gtt */
+drm_public int
+drm_intel_gem_bo_map_uc(drm_intel_bo *bo) {
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+	struct drm_i915_gem_set_domain set_domain;
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = map_uncached(bo);
+	if (ret) {
+		pthread_mutex_unlock(&bufmgr_gem->lock);
+		return ret;
+	}
+
+	/* Now move it to the GTT domain so that the GPU and CPU
+	 * caches are flushed and the GPU isn't actively using the
+	 * buffer.
+	 *
+	 * The domain change is done even for the objects which
+	 * are not bounded. For them first the pages are acquired,
+	 * before the domain change.
+	 */
+	VG_CLEAR(set_domain);
+	set_domain.handle = bo_gem->gem_handle;
+	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
+	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
+	ret = drmIoctl(bufmgr_gem->fd,
+		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
+		       &set_domain);
+	if (ret != 0) {
+		DBG("%s:%d: Error setting domain %d: %s\n",
+		    __FILE__, __LINE__, bo_gem->gem_handle,
+		    strerror(errno));
+	}
+	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return 0;
+}
+
+drm_public int
+drm_intel_gem_bo_map_uc_unsynchronized(drm_intel_bo *bo) {
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+#ifdef HAVE_VALGRIND
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+#endif
+	int ret;
+
+	pthread_mutex_lock(&bufmgr_gem->lock);
+
+	ret = map_uncached(bo);
+	if (ret == 0) {
+		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
+		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
+	}
+
+	pthread_mutex_unlock(&bufmgr_gem->lock);
+
+	return ret;
+}
+
 static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 {
 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
@@ -1293,6 +1410,7 @@  static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 
 		VG_CLEAR(mmap_arg);
 		mmap_arg.handle = bo_gem->gem_handle;
+		mmap_arg.flags = 0;
 		mmap_arg.offset = 0;
 		mmap_arg.size = bo->size;
 		ret = drmIoctl(bufmgr_gem->fd,
@@ -1553,6 +1671,12 @@  static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 }
 
 drm_public int
+drm_intel_gem_bo_unmap_uc(drm_intel_bo *bo)
+{
+	return drm_intel_gem_bo_unmap(bo);
+}
+
+drm_public int
 drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
 {
 	return drm_intel_gem_bo_unmap(bo);