diff mbox

[2/2] drm/i915: Support creation of unbound wc user mappings for objects

Message ID 1420196370-13433-3-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com Jan. 2, 2015, 10:59 a.m. UTC
From: Akash Goel <akash.goel@intel.com>

This patch provides support to create write-combining virtual mappings of
GEM object. It intends to provide the same funtionality of 'mmap_gtt'
interface without the constraints and contention of a limited aperture
space, but requires clients handles the linear to tile conversion on their
own. This is for improving the CPU write operation performance, as with such
mapping, writes and reads are almost 50% faster than with mmap_gtt. Similar
to the GTT mmapping, unlike the regular CPU mmapping, it avoids the cache
flush after update from CPU side, when object is passed onto GPU.  This
type of mapping is specially useful in case of sub-region update,
i.e. when only a portion of the object is to be updated. Using a CPU mmap
in such cases would normally incur a clflush of the whole object, and
using a GTT mmapping would likely require eviction of an active object or
fence and thus stall. The write-combining CPU mmap avoids both.

To ensure the cache coherency, before using this mapping, the GTT domain
has been reused here. This provides the required cache flush if the object
is in CPU domain or synchronization against the concurrent rendering.
Although the access through an uncached mmap should automatically
invalidate the cache lines, this may not be true for non-temporal write
instructions and also not all pages of the object may be updated at any
given point of time through this mapping.  Having a call to get_pages in
set_to_gtt_domain function, as added in the earlier patch 'drm/i915:
Broaden application of set-domain(GTT)', would guarantee the clflush and
so there will be no cachelines holding the data for the object before it
is accessed through this map.

The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been
extended with a new flags field (defaulting to 0 for existent users). In
order for userspace to detect the extended ioctl, a new parameter
I915_PARAM_MMAP_VERSION has been added for versioning the ioctl interface.

v2: Fix error handling, invalid flag detection, renaming (ickle)

v3: Rebase to latest drm-intel-nightly codebase

The new mmapping is exercised by igt/gem_mmap_wc,
igt/gem_concurrent_blit and igt/gem_gtt_speed.

Change-Id: Ie883942f9e689525f72fe9a8d3780c3a9faa769a
Signed-off-by: Akash Goel <akash.goel@intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_dma.c |  3 +++
 drivers/gpu/drm/i915/i915_gem.c | 19 +++++++++++++++++++
 include/uapi/drm/i915_drm.h     |  9 +++++++++
 3 files changed, 31 insertions(+)

Comments

Shuang He Jan. 5, 2015, 12:52 a.m. UTC | #1
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
-------------------------------------Summary-------------------------------------
Platform          Delta          drm-intel-nightly          Series Applied
PNV                 -1              363/364              362/364
ILK                 -18              364/366              346/366
SNB              +3-1              443/450              445/450
IVB                                  496/498              496/498
BYT                                  288/289              288/289
HSW              +5-4              542/564              543/564
BDW                                  415/417              415/417
-------------------------------------Detailed-------------------------------------
Platform  Test                                drm-intel-nightly          Series Applied
*PNV  igt_gem_exec_big      PASS(2, M25M23)      FAIL(1, M23)
 ILK  igt_kms_flip_nonexisting-fb      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_rcs-flip-vs-panning-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_rcs-wf_vblank-vs-dpms-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_render_direct-render      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_bcs-flip-vs-modeset-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_blocking-absolute-wf_vblank-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_busy-flip-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_flip-vs-dpms-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_flip-vs-panning      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_flip-vs-rmfb-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_plain-flip-fb-recreate-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_plain-flip-ts-check-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_rcs-flip-vs-dpms      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_rcs-flip-vs-modeset      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_rcs-flip-vs-panning      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_vblank-vs-hang      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_wf_vblank-ts-check      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
 ILK  igt_kms_flip_wf_vblank-vs-modeset-interruptible      DMESG_WARN(1, M26)PASS(1, M37)      DMESG_WARN(1, M26)
*SNB  igt_kms_flip_dpms-vs-vblank-race      DMESG_WARN(2, M35M22)      PASS(1, M22)
*SNB  igt_kms_flip_dpms-vs-vblank-race-interruptible      DMESG_WARN(2, M35M22)      PASS(1, M22)
 SNB  igt_kms_flip_modeset-vs-vblank-race-interruptible      DMESG_WARN(2, M35M22)PASS(1, M35)      DMESG_WARN(1, M22)
 SNB  igt_kms_plane_plane-position-hole-pipe-B-plane-1      DMESG_WARN(1, M35)PASS(2, M35M22)      PASS(1, M22)
*HSW  igt_kms_flip_flip-vs-dpms-off-vs-modeset-interruptible      DMESG_WARN(2, M40M19)      PASS(1, M19)
*HSW  igt_kms_flip_single-buffer-flip-vs-dpms-off-vs-modeset      PASS(3, M40M19)      DMESG_WARN(1, M19)
 HSW  igt_kms_flip_single-buffer-flip-vs-dpms-off-vs-modeset-interruptible      DMESG_WARN(1, M40)PASS(1, M19)      PASS(1, M19)
 HSW  igt_kms_plane_plane-panning-bottom-right-pipe-C-plane-1      TIMEOUT(2, M40)PASS(1, M19)      PASS(1, M19)
*HSW  igt_kms_plane_plane-panning-top-left-pipe-B-plane-2      PASS(2, M40M19)      DMESG_FAIL(1, M19)
*HSW  igt_kms_plane_plane-position-hole-pipe-B-plane-1      PASS(2, M40M19)      DMESG_WARN(1, M19)
*HSW  igt_kms_plane_plane-position-hole-pipe-B-plane-2      PASS(2, M40M19)      TIMEOUT(1, M19)
*HSW  igt_pm_rpm_modeset-non-lpsp-stress-no-wait      NSPT(1, M19)DMESG_WARN(1, M40)PASS(1, M40)      PASS(1, M19)
 HSW  igt_kms_flip_flip-vs-rmfb      DMESG_WARN(1, M40)PASS(1, M19)      PASS(1, M19)
Note: You need to pay more attention to line start with '*'
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 52730ed..7fad6b8 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -143,6 +143,9 @@  static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 		value = 1;
 		break;
+	case I915_PARAM_MMAP_VERSION:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ef4e424..aeeb617 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1549,6 +1549,12 @@  i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_object *obj;
 	unsigned long addr;
 
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !cpu_has_pat)
+		return -ENODEV;
+
 	obj = drm_gem_object_lookup(dev, file, args->handle);
 	if (obj == NULL)
 		return -ENOENT;
@@ -1564,6 +1570,19 @@  i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
 	addr = vm_mmap(obj->filp, 0, args->size,
 		       PROT_READ | PROT_WRITE, MAP_SHARED,
 		       args->offset);
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		down_write(&mm->mmap_sem);
+		vma = find_vma(mm, addr);
+		if (vma)
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		up_write(&mm->mmap_sem);
+	}
 	drm_gem_object_unreference_unlocked(obj);
 	if (IS_ERR((void *)addr))
 		return addr;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2502622..c155a03 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -341,6 +341,7 @@  typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_WT     	 	 27
 #define I915_PARAM_CMD_PARSER_VERSION	 28
 #define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
+#define I915_PARAM_MMAP_VERSION          30
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -488,6 +489,14 @@  struct drm_i915_gem_mmap {
 	 * This is a fixed-size type for 32/64 compatibility.
 	 */
 	__u64 addr_ptr;
+
+	/**
+	 * Flags for extended behaviour.
+	 *
+	 * Added in version 2.
+	 */
+	__u64 flags;
+#define I915_MMAP_WC 0x1
 };
 
 struct drm_i915_gem_mmap_gtt {