diff mbox

drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl

Message ID 1360678642-10619-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Feb. 12, 2013, 2:17 p.m. UTC
By exporting the ability to map user address and inserting PTEs
representing their backing pages into the GTT, we can exploit UMA in order
to utilize normal application data as a texture source or even as a
render target (depending upon the capabilities of the chipset). This has
a number of uses, with zero-copy downloads to the GPU and efficient
readback making the intermixed streaming of CPU and GPU operations
fairly efficient. This ability has many widespread implications from
faster rendering of client-side software rasterisers (chromium),
mitigation of stalls due to read back (firefox) and to faster pipelining
of texture data (such as pixel buffer objects in GL or data blobs in CL).

v2: Compile with CONFIG_MMU_NOTIFIER
v3: We can sleep while performing invalidate-range, which we can utilise
to drop our page references prior to the kernel manipulating the vma
(for either discard or cloning) and so protect normal users.
v4: Only run the invalidate notifier if the range intercepts the bo.
v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/Makefile              |    1 +
 drivers/gpu/drm/i915/i915_dma.c            |    1 +
 drivers/gpu/drm/i915/i915_drv.h            |   22 ++
 drivers/gpu/drm/i915/i915_gem.c            |   31 ++-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
 drivers/gpu/drm/i915/i915_gem_userptr.c    |  329 ++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   16 ++
 7 files changed, 393 insertions(+), 14 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c

Comments

Daniel Vetter April 8, 2013, 5:18 p.m. UTC | #1
On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> By exporting the ability to map user address and inserting PTEs
> representing their backing pages into the GTT, we can exploit UMA in order
> to utilize normal application data as a texture source or even as a
> render target (depending upon the capabilities of the chipset). This has
> a number of uses, with zero-copy downloads to the GPU and efficient
> readback making the intermixed streaming of CPU and GPU operations
> fairly efficient. This ability has many widespread implications from
> faster rendering of client-side software rasterisers (chromium),
> mitigation of stalls due to read back (firefox) and to faster pipelining
> of texture data (such as pixel buffer objects in GL or data blobs in CL).
> 
> v2: Compile with CONFIG_MMU_NOTIFIER
> v3: We can sleep while performing invalidate-range, which we can utilise
> to drop our page references prior to the kernel manipulating the vma
> (for either discard or cloning) and so protect normal users.
> v4: Only run the invalidate notifier if the range intercepts the bo.
> v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

Quick bikeshed:
- Still not really in favour of the in-page gtt offset handling ... I
  still think that this is just a fancy mmap api, and it better reject
  attempts to not map anything aligned to a full page outright.

- I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
  having mmu notifiers enabled in their distro config, you make sure sna
  doesn't hit it. Imo not enough testing coverage ;-) Or this there
  another reason behind this than "mmu notifiers are too slow"?

  Generally I'm a bit sloppy with going root-only for legacy X stuff (like
  scanline waits), but this here looks very much generally useful. So not
  exemption-material imo.

- On a quick read I've seen some gtt mmap support remnants. This scares
  me, a roundabout njet! would appease. Though I think that should already
  happen with the checks we have to reject snoopable buffers?

- I think we should reject set_cacheing requests on usersptr objects.

- union drm_i915_gem_objects freaked me out shortly, until I've noticed
  that it's only used for our private slab. Maybe and explicit max in
  there? Also, this somewhat defeats that you've moved the userptr stuff
  out of the base class - this way we won't save any memory ...

- Basic igt to check the above api corner-cases return -EINVAL would be
  nice.

- I need to check for deadlocks around the mmu notifier handling. Iirc
  that thing takes all mm locks, and our own bo unref code can be called
  from all kinds of interesting places. Since each vma object also holds a
  ref onto a gem bo I suspect that we do have some fun all here ...

Cheers, Daniel

> ---
>  drivers/gpu/drm/i915/Makefile              |    1 +
>  drivers/gpu/drm/i915/i915_dma.c            |    1 +
>  drivers/gpu/drm/i915/i915_drv.h            |   22 ++
>  drivers/gpu/drm/i915/i915_gem.c            |   31 ++-
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |    7 +-
>  drivers/gpu/drm/i915/i915_gem_userptr.c    |  329 ++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                |   16 ++
>  7 files changed, 393 insertions(+), 14 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_userptr.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 91f3ac6..42858f6 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -14,6 +14,7 @@ i915-y := i915_drv.o i915_dma.o i915_irq.o \
>  	  i915_gem_gtt.o \
>  	  i915_gem_stolen.o \
>  	  i915_gem_tiling.o \
> +	  i915_gem_userptr.o \
>  	  i915_sysfs.o \
>  	  i915_trace_points.o \
>  	  i915_ums.o \
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 4fa6beb..9b1984c 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1883,6 +1883,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
>  	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
> +	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
>  };
>  
>  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 923dc0a..90070f4 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -42,6 +42,7 @@
>  #include <linux/backlight.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
> +#include <linux/mmu_notifier.h>
>  #include <linux/pm_qos.h>
>  
>  /* General customization:
> @@ -1076,6 +1077,7 @@ struct drm_i915_gem_object_ops {
>  	 */
>  	int (*get_pages)(struct drm_i915_gem_object *);
>  	void (*put_pages)(struct drm_i915_gem_object *);
> +	void (*release)(struct drm_i915_gem_object *);
>  };
>  
>  struct drm_i915_gem_object {
> @@ -1222,6 +1224,23 @@ struct drm_i915_gem_object {
>  };
>  #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
>  
> +struct i915_gem_userptr_object {
> +	struct drm_i915_gem_object gem;
> +	uintptr_t user_ptr;
> +	size_t user_size;
> +	int read_only;
> +
> +	struct mm_struct *mm;
> +#if defined(CONFIG_MMU_NOTIFIER)
> +	struct mmu_notifier mn;
> +#endif
> +};
> +
> +union drm_i915_gem_objects {
> +	struct drm_i915_gem_object base;
> +	struct i915_gem_userptr_object userptr;
> +};
> +
>  #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
>  
>  /**
> @@ -1501,6 +1520,8 @@ int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
>  int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
>  			   struct drm_file *file_priv);
> +int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
> +			   struct drm_file *file);
>  int i915_gem_set_tiling(struct drm_device *dev, void *data,
>  			struct drm_file *file_priv);
>  int i915_gem_get_tiling(struct drm_device *dev, void *data,
> @@ -1554,6 +1575,7 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>  	BUG_ON(obj->pages_pin_count == 0);
>  	obj->pages_pin_count--;
>  }
> +int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
>  
>  int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
>  int i915_gem_object_sync(struct drm_i915_gem_object *obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 73b1e9e..65a36bf 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1336,20 +1336,18 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
>  	struct drm_device *dev = obj->base.dev;
>  	drm_i915_private_t *dev_priv = dev->dev_private;
> -	pgoff_t page_offset;
> +	pgoff_t offset;
>  	unsigned long pfn;
>  	int ret = 0;
>  	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
>  
> -	/* We don't use vmf->pgoff since that has the fake offset */
> -	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
> -		PAGE_SHIFT;
> -
>  	ret = i915_mutex_lock_interruptible(dev);
>  	if (ret)
>  		goto out;
>  
> -	trace_i915_gem_object_fault(obj, page_offset, true, write);
> +	trace_i915_gem_object_fault(obj,
> +				    (unsigned long)(vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT,
> +				    true, write);
>  
>  	/* Access to snoopable pages through the GTT is incoherent. */
>  	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
> @@ -1372,8 +1370,10 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  
>  	obj->fault_mappable = true;
>  
> -	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
> -		page_offset;
> +	/* We don't use vmf->pgoff since that has the fake offset */
> +	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
> +	offset += obj->gtt_offset;
> +	pfn = (dev_priv->gtt.mappable_base + offset) >> PAGE_SHIFT;
>  
>  	/* Finally, remap it using the new GTT offset */
>  	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
> @@ -1565,6 +1565,12 @@ i915_gem_mmap_gtt(struct drm_file *file,
>  		goto out;
>  	}
>  
> +	if (offset_in_page(obj->gtt_offset)) {
> +		DRM_ERROR("Attempting to mmap an unaligned buffer\n");
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
>  	ret = i915_gem_object_create_mmap_offset(obj);
>  	if (ret)
>  		goto out;
> @@ -2495,9 +2501,9 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj)
>  	/* Avoid an unnecessary call to unbind on rebind. */
>  	obj->map_and_fenceable = true;
>  
> +	obj->gtt_offset -= obj->gtt_space->start;
>  	drm_mm_put_block(obj->gtt_space);
>  	obj->gtt_space = NULL;
> -	obj->gtt_offset = 0;
>  
>  	return 0;
>  }
> @@ -2987,7 +2993,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
>  	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
>  
>  	obj->gtt_space = node;
> -	obj->gtt_offset = node->start;
> +	obj->gtt_offset += node->start;
>  
>  	fenceable =
>  		node->size == fence_size &&
> @@ -3800,6 +3806,9 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
>  	if (obj->base.import_attach)
>  		drm_prime_gem_destroy(&obj->base, NULL);
>  
> +	if (obj->ops->release)
> +		obj->ops->release(obj);
> +
>  	drm_gem_object_release(&obj->base);
>  	i915_gem_info_remove_obj(dev_priv, obj->base.size);
>  
> @@ -4101,7 +4110,7 @@ i915_gem_load(struct drm_device *dev)
>  
>  	dev_priv->slab =
>  		kmem_cache_create("i915_gem_object",
> -				  sizeof(struct drm_i915_gem_object), 0,
> +				  sizeof(union drm_i915_gem_objects), 0,
>  				  SLAB_HWCACHE_ALIGN,
>  				  NULL);
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2726910..a3e68af 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -254,14 +254,16 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  		return -EFAULT;
>  
>  	reloc->delta += target_offset;
> +	reloc->offset += obj->gtt_offset;
>  	if (use_cpu_reloc(obj)) {
> -		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
> +		uint32_t page_offset = offset_in_page(reloc->offset);
>  		char *vaddr;
>  
>  		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
>  		if (ret)
>  			return ret;
>  
> +		reloc->offset -= obj->gtt_space->start;
>  		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
>  							     reloc->offset >> PAGE_SHIFT));
>  		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
> @@ -280,11 +282,10 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
>  			return ret;
>  
>  		/* Map the page containing the relocation we're going to perform.  */
> -		reloc->offset += obj->gtt_offset;
>  		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
>  						      reloc->offset & PAGE_MASK);
>  		reloc_entry = (uint32_t __iomem *)
> -			(reloc_page + (reloc->offset & ~PAGE_MASK));
> +			(reloc_page + offset_in_page(reloc->offset));
>  		iowrite32(reloc->delta, reloc_entry);
>  		io_mapping_unmap_atomic(reloc_page);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
> new file mode 100644
> index 0000000..f93fa1b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
> @@ -0,0 +1,329 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include "drmP.h"
> +#include "i915_drm.h"
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/mmu_notifier.h>
> +#include <linux/swap.h>
> +
> +static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
> +{
> +	return container_of(obj, struct i915_gem_userptr_object, gem);
> +}
> +
> +#if defined(CONFIG_MMU_NOTIFIER)
> +static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
> +						       struct mm_struct *mm,
> +						       unsigned long start,
> +						       unsigned long end)
> +{
> +	struct i915_gem_userptr_object *vmap;
> +	struct drm_device *dev;
> +
> +	/* XXX race between obj unref and mmu notifier? */
> +	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
> +	BUG_ON(vmap->mm != mm);
> +
> +	if (vmap->user_ptr >= end || vmap->user_ptr + vmap->user_size <= start)
> +		return;
> +
> +	if (vmap->gem.pages == NULL) /* opportunistic check */
> +		return;
> +
> +	dev = vmap->gem.base.dev;
> +	mutex_lock(&dev->struct_mutex);
> +	if (vmap->gem.gtt_space) {
> +		struct drm_i915_private *dev_priv = dev->dev_private;
> +		bool was_interruptible;
> +		int ret;
> +
> +		was_interruptible = dev_priv->mm.interruptible;
> +		dev_priv->mm.interruptible = false;
> +
> +		ret = i915_gem_object_unbind(&vmap->gem);
> +		BUG_ON(ret && ret != -EIO);
> +
> +		dev_priv->mm.interruptible = was_interruptible;
> +	}
> +
> +	BUG_ON(i915_gem_object_put_pages(&vmap->gem));
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
> +					struct mm_struct *mm)
> +{
> +	struct i915_gem_userptr_object *vmap;
> +
> +	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
> +	BUG_ON(vmap->mm != mm);
> +	vmap->mm = NULL;
> +
> +	/* XXX Schedule an eventual unbind? E.g. hook into require request?
> +	 * However, locking will be complicated.
> +	 */
> +}
> +
> +static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
> +	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
> +	.release = i915_gem_userptr_mn_release,
> +};
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
> +{
> +	if (vmap->mn.ops && vmap->mm) {
> +		mmu_notifier_unregister(&vmap->mn, vmap->mm);
> +		BUG_ON(vmap->mm);
> +	}
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
> +				    unsigned flags)
> +{
> +	if (flags & I915_USERPTR_UNSYNCHRONIZED)
> +		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
> +
> +	vmap->mn.ops = &i915_gem_userptr_notifier;
> +	return mmu_notifier_register(&vmap->mn, vmap->mm);
> +}
> +
> +#else
> +
> +static void
> +i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
> +{
> +}
> +
> +static int
> +i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
> +				    unsigned flags)
> +{
> +	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
> +		return -ENODEV;
> +
> +	if (!capable(CAP_SYS_ADMIN))
> +		return -EPERM;
> +
> +	return 0;
> +}
> +#endif
> +
> +static int
> +i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
> +	int num_pages = obj->base.size >> PAGE_SHIFT;
> +	struct sg_table *st;
> +	struct scatterlist *sg;
> +	struct page **pvec;
> +	int n, pinned, ret;
> +
> +	if (vmap->mm == NULL)
> +		return -EFAULT;
> +
> +	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
> +		       (char __user *)vmap->user_ptr, vmap->user_size))
> +		return -EFAULT;
> +
> +	/* If userspace should engineer that these pages are replaced in
> +	 * the vma between us binding this page into the GTT and completion
> +	 * of rendering... Their loss. If they change the mapping of their
> +	 * pages they need to create a new bo to point to the new vma.
> +	 *
> +	 * However, that still leaves open the possibility of the vma
> +	 * being copied upon fork. Which falls under the same userspace
> +	 * synchronisation issue as a regular bo, except that this time
> +	 * the process may not be expecting that a particular piece of
> +	 * memory is tied to the GPU.
> +	 *
> +	 * Fortunately, we can hook into the mmu_notifier in order to
> +	 * discard the page references prior to anything nasty happening
> +	 * to the vma (discard or cloning) which should prevent the more
> +	 * egregious cases from causing harm.
> +	 */
> +
> +	pvec = kmalloc(num_pages*sizeof(struct page *),
> +		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
> +	if (pvec == NULL) {
> +		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
> +		if (pvec == NULL)
> +			return -ENOMEM;
> +	}
> +
> +	pinned = 0;
> +	if (vmap->mm == current->mm)
> +		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
> +					       !vmap->read_only, pvec);
> +	if (pinned < num_pages) {
> +		struct mm_struct *mm = vmap->mm;
> +		ret = 0;
> +		mutex_unlock(&obj->base.dev->struct_mutex);
> +		down_read(&mm->mmap_sem);
> +		if (vmap->mm != NULL)
> +			ret = get_user_pages(current, mm,
> +					     vmap->user_ptr + (pinned << PAGE_SHIFT),
> +					     num_pages - pinned,
> +					     !vmap->read_only, 0,
> +					     pvec + pinned,
> +					     NULL);
> +		up_read(&mm->mmap_sem);
> +		mutex_lock(&obj->base.dev->struct_mutex);
> +		if (ret > 0)
> +			pinned += ret;
> +
> +		if (obj->pages || pinned < num_pages) {
> +			ret = obj->pages ? 0 : -EFAULT;
> +			goto cleanup_pinned;
> +		}
> +	}
> +
> +	st = kmalloc(sizeof(*st), GFP_KERNEL);
> +	if (st == NULL) {
> +		ret = -ENOMEM;
> +		goto cleanup_pinned;
> +	}
> +
> +	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
> +		ret = -ENOMEM;
> +		goto cleanup_st;
> +	}
> +
> +	for_each_sg(st->sgl, sg, num_pages, n)
> +		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
> +	drm_free_large(pvec);
> +
> +	obj->pages = st;
> +	return 0;
> +
> +cleanup_st:
> +	kfree(st);
> +cleanup_pinned:
> +	release_pages(pvec, pinned, 0);
> +	drm_free_large(pvec);
> +	return ret;
> +}
> +
> +static void
> +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
> +{
> +	struct scatterlist *sg;
> +	int i;
> +
> +	if (obj->madv != I915_MADV_WILLNEED)
> +		obj->dirty = 0;
> +
> +	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +		struct page *page = sg_page(sg);
> +
> +		if (obj->dirty)
> +			set_page_dirty(page);
> +
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +	}
> +	obj->dirty = 0;
> +
> +	sg_free_table(obj->pages);
> +	kfree(obj->pages);
> +}
> +
> +static void
> +i915_gem_userptr_release(struct drm_i915_gem_object *obj)
> +{
> +	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
> +
> +	i915_gem_userptr_release__mmu_notifier(vmap);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
> +	.get_pages = i915_gem_userptr_get_pages,
> +	.put_pages = i915_gem_userptr_put_pages,
> +	.release = i915_gem_userptr_release,
> +};
> +
> +/**
> + * Creates a new mm object that wraps some user memory.
> + */
> +int
> +i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> +{
> +	struct drm_i915_private *dev_priv = dev->dev_private;
> +	struct drm_i915_gem_userptr *args = data;
> +	struct i915_gem_userptr_object *obj;
> +	loff_t first_data_page, last_data_page;
> +	int num_pages;
> +	int ret;
> +	u32 handle;
> +
> +	if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_UNSYNCHRONIZED))
> +		return -EINVAL;
> +
> +	first_data_page = args->user_ptr / PAGE_SIZE;
> +	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
> +	num_pages = last_data_page - first_data_page + 1;
> +	if (num_pages * PAGE_SIZE > dev_priv->gtt.total)
> +		return -E2BIG;
> +
> +	/* Allocate the new object */
> +	obj = i915_gem_object_alloc(dev);
> +	if (obj == NULL)
> +		return -ENOMEM;
> +
> +	if (drm_gem_private_object_init(dev, &obj->gem.base,
> +					num_pages * PAGE_SIZE)) {
> +		i915_gem_object_free(&obj->gem);
> +		return -ENOMEM;
> +	}
> +
> +	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
> +	obj->gem.cache_level = I915_CACHE_LLC_MLC;
> +
> +	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
> +	obj->user_ptr = args->user_ptr;
> +	obj->user_size = args->user_size;
> +	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
> +
> +	/* And keep a pointer to the current->mm for resolving the user pages
> +	 * at binding. This means that we need to hook into the mmu_notifier
> +	 * in order to detect if the mmu is destroyed.
> +	 */
> +	obj->mm = current->mm;
> +	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
> +	if (ret)
> +		return ret;
> +
> +	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
> +	/* drop reference from allocate - handle holds it now */
> +	drm_gem_object_unreference(&obj->gem.base);
> +	if (ret)
> +		return ret;
> +
> +	args->handle = handle;
> +	return 0;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 07d5941..20e39be 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -198,6 +198,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_SET_CACHING	0x2f
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
> +#define DRM_I915_GEM_USERPTR		0x32
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -247,6 +248,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> +#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -980,4 +982,18 @@ struct drm_i915_reg_read {
>  	__u64 offset;
>  	__u64 val; /* Return value */
>  };
> +
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u32 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	 * Returned handle for the object.
> +	 *
> +	 * Object handles are nonzero.
> +	 */
> +	__u32 handle;
> +};
>  #endif /* _UAPI_I915_DRM_H_ */
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson April 8, 2013, 5:40 p.m. UTC | #2
On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> > By exporting the ability to map user address and inserting PTEs
> > representing their backing pages into the GTT, we can exploit UMA in order
> > to utilize normal application data as a texture source or even as a
> > render target (depending upon the capabilities of the chipset). This has
> > a number of uses, with zero-copy downloads to the GPU and efficient
> > readback making the intermixed streaming of CPU and GPU operations
> > fairly efficient. This ability has many widespread implications from
> > faster rendering of client-side software rasterisers (chromium),
> > mitigation of stalls due to read back (firefox) and to faster pipelining
> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> > 
> > v2: Compile with CONFIG_MMU_NOTIFIER
> > v3: We can sleep while performing invalidate-range, which we can utilise
> > to drop our page references prior to the kernel manipulating the vma
> > (for either discard or cloning) and so protect normal users.
> > v4: Only run the invalidate notifier if the range intercepts the bo.
> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> Quick bikeshed:
> - Still not really in favour of the in-page gtt offset handling ... I
>   still think that this is just a fancy mmap api, and it better reject
>   attempts to not map anything aligned to a full page outright.

Strongly disagree.
 
> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>   having mmu notifiers enabled in their distro config, you make sure sna
>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>   another reason behind this than "mmu notifiers are too slow"?
> 
>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>   scanline waits), but this here looks very much generally useful. So not
>   exemption-material imo.

Strongly disagree. Most of my machines do not have mmu-notifiers and
would still like to benefit from userptr and I see no reason why we need
to force mmu-notifiers.

> - On a quick read I've seen some gtt mmap support remnants. This scares
>   me, a roundabout njet! would appease. Though I think that should already
>   happen with the checks we have to reject snoopable buffers?

That's because there are platforms where it is theorectically possible
and whilst I have no use case for it, I wanted to make it work
nevertheless. I still think it is possible, but I could not see a way to
do so without completely replacing the drm vm code.
 
> - I think we should reject set_cacheing requests on usersptr objects.

I don't think that is strictly required, just like we should not limit
the user from using set_tiling. (Though the user is never actually going
to tell the kernel about such tiling...)
 
> - union drm_i915_gem_objects freaked me out shortly, until I've noticed
>   that it's only used for our private slab. Maybe and explicit max in
>   there? Also, this somewhat defeats that you've moved the userptr stuff
>   out of the base class - this way we won't save any memory ...

The alternative is to use a union inside the object. Long ago, I had a
few more objects in there.
 
> - Basic igt to check the above api corner-cases return -EINVAL would be
>   nice.

Been sitting around for ages, just waiting for the interface to be
agreed upon.
 
> - I need to check for deadlocks around the mmu notifier handling. Iirc
>   that thing takes all mm locks, and our own bo unref code can be called
>   from all kinds of interesting places. Since each vma object also holds a
>   ref onto a gem bo I suspect that we do have some fun all here ...

The notifier itself takes no locks, so the locking is whatever state the
caller sets up. The current locking order is (mm, struct mutex) i.e. the
same ordering as used the notifier callbacks.
-Chris
Daniel Vetter April 8, 2013, 7:24 p.m. UTC | #3
On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>> > By exporting the ability to map user address and inserting PTEs
>> > representing their backing pages into the GTT, we can exploit UMA in order
>> > to utilize normal application data as a texture source or even as a
>> > render target (depending upon the capabilities of the chipset). This has
>> > a number of uses, with zero-copy downloads to the GPU and efficient
>> > readback making the intermixed streaming of CPU and GPU operations
>> > fairly efficient. This ability has many widespread implications from
>> > faster rendering of client-side software rasterisers (chromium),
>> > mitigation of stalls due to read back (firefox) and to faster pipelining
>> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>> >
>> > v2: Compile with CONFIG_MMU_NOTIFIER
>> > v3: We can sleep while performing invalidate-range, which we can utilise
>> > to drop our page references prior to the kernel manipulating the vma
>> > (for either discard or cloning) and so protect normal users.
>> > v4: Only run the invalidate notifier if the range intercepts the bo.
>> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>
>> Quick bikeshed:
>> - Still not really in favour of the in-page gtt offset handling ... I
>>   still think that this is just a fancy mmap api, and it better reject
>>   attempts to not map anything aligned to a full page outright.
>
> Strongly disagree.

Ok, let's dig out the beaten arguments here ;-)
- Imo the gtt_offset frobbery is a bit fragile, and getting this right
in the face of ppgtt won't make it better. And yes the only reason we
still have that field is that you've shot down any patch to remove it
citing userptr here. So "it's here already" doesn't count ;-)
- Userptr for i915 is an mmap interface, and that works on pages,
lying to userspace isn't great.
- I don't see why userspace can't do this themselves. I've seen that
it makes things easier in SNA/X, but for a general purpose interface
that argument doesn't cut too much.
- I'm also a bit afraid that our code implicitly assumes that
size/offset are always page-aligned and I kinda want to avoid that we
have to audit for such issues from here on. We've blown up in the past
assuming that size > 0 already, I think we're set to blow up on this
one here.

In any case, if you really want to stick to this I want this to be
explictly track in an obj->reloc_gtt_offset_adjustment or something
which is very loudly yelling at people to make sure no one trips over
it. Tracking the adjustment in a separate field, which would only ever
be used in the reloc code would address all my concerns (safe for the
api ugliness one).

>> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>>   having mmu notifiers enabled in their distro config, you make sure sna
>>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>>   another reason behind this than "mmu notifiers are too slow"?
>>
>>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>>   scanline waits), but this here looks very much generally useful. So not
>>   exemption-material imo.
>
> Strongly disagree. Most of my machines do not have mmu-notifiers and
> would still like to benefit from userptr and I see no reason why we need
> to force mmu-notifiers.

Note that I didn't shout against the mmu_notifier-less support
(although I'm honestly not too happy about it), what I don't like is
the override bit disabling the mmu_notifiers even if we have them.
Since that will mean that the code won't be tested through SNA, and so
has a good chance of being buggy. Once mesa comes around and uses it,
it'll nicely blow up. And one of the reason Jesse is breathing down my
neck to merge this is "other guys are interested in this at intel".

>> - On a quick read I've seen some gtt mmap support remnants. This scares
>>   me, a roundabout njet! would appease. Though I think that should already
>>   happen with the checks we have to reject snoopable buffers?
>
> That's because there are platforms where it is theorectically possible
> and whilst I have no use case for it, I wanted to make it work
> nevertheless. I still think it is possible, but I could not see a way to
> do so without completely replacing the drm vm code.

If I understand things correctly we should be able to block this
simply by refusing to create an mmap offset for a userptr backed
object.

>> - I think we should reject set_cacheing requests on usersptr objects.
>
> I don't think that is strictly required, just like we should not limit
> the user from using set_tiling. (Though the user is never actually going
> to tell the kernel about such tiling...).

Yeah, I guess we could allow people to shot their foot off. Otoh it
adds another dimension to the userptr interface, which we need to make
sure it works. Similar besides set_tiling is probably also
prime_export.

The reason I'd prefer to lock things down is that we have fairly nice
coverage for normal gem objects wrt exercising corner-cases with igt.
If we don't disallow the same corner-cases with userptr, I'd prefer
the tests to also cover those ... Disallowing is cheaper ;-)

>> - union drm_i915_gem_objects freaked me out shortly, until I've noticed
>>   that it's only used for our private slab. Maybe and explicit max in
>>   there? Also, this somewhat defeats that you've moved the userptr stuff
>>   out of the base class - this way we won't save any memory ...
>
> The alternative is to use a union inside the object. Long ago, I had a
> few more objects in there.

Yeah, the entire sg rework nicely unified things \o/

Still, the union seems to be killable with a few lines of patch-diff
on top of this ...

>> - Basic igt to check the above api corner-cases return -EINVAL would be
>>   nice.
>
> Been sitting around for ages, just waiting for the interface to be
> agreed upon.
>
>> - I need to check for deadlocks around the mmu notifier handling. Iirc
>>   that thing takes all mm locks, and our own bo unref code can be called
>>   from all kinds of interesting places. Since each vma object also holds a
>>   ref onto a gem bo I suspect that we do have some fun all here ...
>
> The notifier itself takes no locks, so the locking is whatever state the
> caller sets up. The current locking order is (mm, struct mutex) i.e. the
> same ordering as used the notifier callbacks.

Oops, too scared from the fbdev_notifier - the mmu notifier indeed
uses srcu, so we should be fine here. I need to read the callchains
in-depth though and maybe ask lockdep what it thinks about this first
before I can put my concerns at ease ...
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
Chris Wilson April 8, 2013, 9:48 p.m. UTC | #4
On Mon, Apr 08, 2013 at 09:24:58PM +0200, Daniel Vetter wrote:
> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> >> > By exporting the ability to map user address and inserting PTEs
> >> > representing their backing pages into the GTT, we can exploit UMA in order
> >> > to utilize normal application data as a texture source or even as a
> >> > render target (depending upon the capabilities of the chipset). This has
> >> > a number of uses, with zero-copy downloads to the GPU and efficient
> >> > readback making the intermixed streaming of CPU and GPU operations
> >> > fairly efficient. This ability has many widespread implications from
> >> > faster rendering of client-side software rasterisers (chromium),
> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> >> >
> >> > v2: Compile with CONFIG_MMU_NOTIFIER
> >> > v3: We can sleep while performing invalidate-range, which we can utilise
> >> > to drop our page references prior to the kernel manipulating the vma
> >> > (for either discard or cloning) and so protect normal users.
> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Quick bikeshed:
> >> - Still not really in favour of the in-page gtt offset handling ... I
> >>   still think that this is just a fancy mmap api, and it better reject
> >>   attempts to not map anything aligned to a full page outright.
> >
> > Strongly disagree.
> 
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)
> - Userptr for i915 is an mmap interface, and that works on pages,
> lying to userspace isn't great.

No. Due to the nature of existing decades old userspace, I need to map
byte ranges, so I do not view this as a simple equivalence to mmapping
a bo.

> - I don't see why userspace can't do this themselves. I've seen that
> it makes things easier in SNA/X, but for a general purpose interface
> that argument doesn't cut too much.

I have completely opposite viewpoint: A general purpose interface is not
a page interface, and that this interface trades a small amount of
kernel complexity (tracking the offset_in_page) so that userspace has a
flexible interface that matches its requirements.

> - I'm also a bit afraid that our code implicitly assumes that
> size/offset are always page-aligned and I kinda want to avoid that we
> have to audit for such issues from here on. We've blown up in the past
> assuming that size > 0 already, I think we're set to blow up on this
> one here.

Now that we can distinguish between size and num_pages, there is no
longer a need for size to be page aligned (and is currently redundant).
 
> In any case, if you really want to stick to this I want this to be
> explictly track in an obj->reloc_gtt_offset_adjustment or something

Sure, let's call it obj->gtt_offset:12;

> which is very loudly yelling at people to make sure no one trips over
> it. Tracking the adjustment in a separate field, which would only ever
> be used in the reloc code would address all my concerns (safe for the
> api ugliness one).

And everywhere that deals in GTT addresses.
 
> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
> >>   having mmu notifiers enabled in their distro config, you make sure sna
> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
> >>   another reason behind this than "mmu notifiers are too slow"?
> >>
> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
> >>   scanline waits), but this here looks very much generally useful. So not
> >>   exemption-material imo.
> >
> > Strongly disagree. Most of my machines do not have mmu-notifiers and
> > would still like to benefit from userptr and I see no reason why we need
> > to force mmu-notifiers.
> 
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I don't see how you can reconcile that viewpoint. In order for userspace
to be able to use userptr without mmu-notifiers it has to be very
careful about managing synchronisation of such pointers across vma
events. So userptr should only be allowed to opt-in to such a precarious
arrangement. And because it was so easy for you to shoot yourself and
the entire system in the head by getting such synchronisation wrong, the
suggestion was to only allow root access to the shotgun. Once it is opting
in, behaviour of userptr should then be consistent across all
configurations i.e. it should always avoid the mmu-notifier if userspace
asks.

As for testing using SNA, you can just ask SNA to only use mmu-notifier
backed userptr and so only run when mmu-notifiers are available.
 
> >> - On a quick read I've seen some gtt mmap support remnants. This scares
> >>   me, a roundabout njet! would appease. Though I think that should already
> >>   happen with the checks we have to reject snoopable buffers?
> >
> > That's because there are platforms where it is theorectically possible
> > and whilst I have no use case for it, I wanted to make it work
> > nevertheless. I still think it is possible, but I could not see a way to
> > do so without completely replacing the drm vm code.
> 
> If I understand things correctly we should be able to block this
> simply by refusing to create an mmap offset for a userptr backed
> object.

But I want to be able to support GTT mappings wherever possible. The
basic principle of making sure every bo is first class, so that there
are no surprises. Such as stolen, dmabuf...
 
> >> - I think we should reject set_cacheing requests on usersptr objects.
> >
> > I don't think that is strictly required, just like we should not limit
> > the user from using set_tiling. (Though the user is never actually going
> > to tell the kernel about such tiling...).
> 
> Yeah, I guess we could allow people to shot their foot off. Otoh it
> adds another dimension to the userptr interface, which we need to make
> sure it works. Similar besides set_tiling is probably also
> prime_export.
> 
> The reason I'd prefer to lock things down is that we have fairly nice
> coverage for normal gem objects wrt exercising corner-cases with igt.
> If we don't disallow the same corner-cases with userptr, I'd prefer
> the tests to also cover those ... Disallowing is cheaper ;-)

See above: every bo should be first class...
-Chris
Eric Anholt April 8, 2013, 10:06 p.m. UTC | #5
Daniel Vetter <daniel@ffwll.ch> writes:

> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>>> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>>> > By exporting the ability to map user address and inserting PTEs
>>> > representing their backing pages into the GTT, we can exploit UMA in order
>>> > to utilize normal application data as a texture source or even as a
>>> > render target (depending upon the capabilities of the chipset). This has
>>> > a number of uses, with zero-copy downloads to the GPU and efficient
>>> > readback making the intermixed streaming of CPU and GPU operations
>>> > fairly efficient. This ability has many widespread implications from
>>> > faster rendering of client-side software rasterisers (chromium),
>>> > mitigation of stalls due to read back (firefox) and to faster pipelining
>>> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>>> >
>>> > v2: Compile with CONFIG_MMU_NOTIFIER
>>> > v3: We can sleep while performing invalidate-range, which we can utilise
>>> > to drop our page references prior to the kernel manipulating the vma
>>> > (for either discard or cloning) and so protect normal users.
>>> > v4: Only run the invalidate notifier if the range intercepts the bo.
>>> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>>> >
>>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>
>>> Quick bikeshed:
>>> - Still not really in favour of the in-page gtt offset handling ... I
>>>   still think that this is just a fancy mmap api, and it better reject
>>>   attempts to not map anything aligned to a full page outright.
>>
>> Strongly disagree.
>
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)

Agreed -- given that I need to look at byte offsets for alignment issues
on basically all my usages of memory, having my data have part of its
intra-page offset hidden in the kernel at creation time would be bad for
Mesa.

Access to data is controlled at a page level, so I think this kernel
interface should act at a page level.

>>> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>>>   having mmu notifiers enabled in their distro config, you make sure sna
>>>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>>>   another reason behind this than "mmu notifiers are too slow"?
>>>
>>>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>>>   scanline waits), but this here looks very much generally useful. So not
>>>   exemption-material imo.
>>
>> Strongly disagree. Most of my machines do not have mmu-notifiers and
>> would still like to benefit from userptr and I see no reason why we need
>> to force mmu-notifiers.
>
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I hate root-only interfaces.  It helps lock in root-only X, and means
that X gets special treatment compared to the 3D driver.
Daniel Vetter April 15, 2013, 6:37 p.m. UTC | #6
On Mon, Apr 8, 2013 at 11:48 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Mon, Apr 08, 2013 at 09:24:58PM +0200, Daniel Vetter wrote:
>> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
>> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
>> >> > By exporting the ability to map user address and inserting PTEs
>> >> > representing their backing pages into the GTT, we can exploit UMA in order
>> >> > to utilize normal application data as a texture source or even as a
>> >> > render target (depending upon the capabilities of the chipset). This has
>> >> > a number of uses, with zero-copy downloads to the GPU and efficient
>> >> > readback making the intermixed streaming of CPU and GPU operations
>> >> > fairly efficient. This ability has many widespread implications from
>> >> > faster rendering of client-side software rasterisers (chromium),
>> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
>> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
>> >> >
>> >> > v2: Compile with CONFIG_MMU_NOTIFIER
>> >> > v3: We can sleep while performing invalidate-range, which we can utilise
>> >> > to drop our page references prior to the kernel manipulating the vma
>> >> > (for either discard or cloning) and so protect normal users.
>> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
>> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
>> >> >
>> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> >>
>> >> Quick bikeshed:
>> >> - Still not really in favour of the in-page gtt offset handling ... I
>> >>   still think that this is just a fancy mmap api, and it better reject
>> >>   attempts to not map anything aligned to a full page outright.
>> >
>> > Strongly disagree.
>>
>> Ok, let's dig out the beaten arguments here ;-)
>> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
>> in the face of ppgtt won't make it better. And yes the only reason we
>> still have that field is that you've shot down any patch to remove it
>> citing userptr here. So "it's here already" doesn't count ;-)
>> - Userptr for i915 is an mmap interface, and that works on pages,
>> lying to userspace isn't great.
>
> No. Due to the nature of existing decades old userspace, I need to map
> byte ranges, so I do not view this as a simple equivalence to mmapping
> a bo.

See below, I need to roll this up from behind ...

>> - I don't see why userspace can't do this themselves. I've seen that
>> it makes things easier in SNA/X, but for a general purpose interface
>> that argument doesn't cut too much.
>
> I have completely opposite viewpoint: A general purpose interface is not
> a page interface, and that this interface trades a small amount of
> kernel complexity (tracking the offset_in_page) so that userspace has a
> flexible interface that matches its requirements.

mmap is the general-purpose map something into cpu address space
thingy, and it works on pages, too. So still don't buy this, and Eric
seems to agree.

But anyway, if you're convinced I'm grumpily ok with those semantics.

>> - I'm also a bit afraid that our code implicitly assumes that
>> size/offset are always page-aligned and I kinda want to avoid that we
>> have to audit for such issues from here on. We've blown up in the past
>> assuming that size > 0 already, I think we're set to blow up on this
>> one here.
>
> Now that we can distinguish between size and num_pages, there is no
> longer a need for size to be page aligned (and is currently redundant).
>
>> In any case, if you really want to stick to this I want this to be
>> explictly track in an obj->reloc_gtt_offset_adjustment or something
>
> Sure, let's call it obj->gtt_offset:12;

As long as it's something with the high 20 bits zero I'm ok. Since
with ppgtt we'll soon have tons of different ->gtt_offsets, so with
your current approach we either need to keep this offset at different
places (and I'd really really dislike to do that, see all the stuff
I've been fighting in modeset land). So I want this separate and
explicit.

>> which is very loudly yelling at people to make sure no one trips over
>> it. Tracking the adjustment in a separate field, which would only ever
>> be used in the reloc code would address all my concerns (safe for the
>> api ugliness one).
>
> And everywhere that deals in GTT addresses.

I've ignored the other cases since I don't see a use-case, but that's
a point to be address more below. So looking at other places we use
gtt address I see two major pieces
- pwrite/pread: Since we also need to deal in struct pages for cpu
access it's easiest to just add the static offset at the beginning.
Again, much clearer when this offset is explicit and stored someplace
separate.
- gtt mmaps: I have no idea how you plan to coax the mmap syscal into
cooperation here.

So addressing your point above that SNA has to deal with 25+ years of
userspace legacy: I really want this to be explicitly done in the
kernel in all the places we need it. At which point I honestly don't
understand why that special offset can't be moved into the kgem
abstraction. For normal objects it would be 0, for un-aligned userptr
objects it would be non-0. In all cases kgem would add that offset
when emitting a relocation.

Now if the argument is that this adds measurable overhead to
relocation emitting, I want this overhead to be measured to justify
adding this to the kernel.

>> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
>> >>   having mmu notifiers enabled in their distro config, you make sure sna
>> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
>> >>   another reason behind this than "mmu notifiers are too slow"?
>> >>
>> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
>> >>   scanline waits), but this here looks very much generally useful. So not
>> >>   exemption-material imo.
>> >
>> > Strongly disagree. Most of my machines do not have mmu-notifiers and
>> > would still like to benefit from userptr and I see no reason why we need
>> > to force mmu-notifiers.
>>
>> Note that I didn't shout against the mmu_notifier-less support
>> (although I'm honestly not too happy about it), what I don't like is
>> the override bit disabling the mmu_notifiers even if we have them.
>> Since that will mean that the code won't be tested through SNA, and so
>> has a good chance of being buggy. Once mesa comes around and uses it,
>> it'll nicely blow up. And one of the reason Jesse is breathing down my
>> neck to merge this is "other guys are interested in this at intel".
>
> I don't see how you can reconcile that viewpoint. In order for userspace
> to be able to use userptr without mmu-notifiers it has to be very
> careful about managing synchronisation of such pointers across vma
> events. So userptr should only be allowed to opt-in to such a precarious
> arrangement. And because it was so easy for you to shoot yourself and
> the entire system in the head by getting such synchronisation wrong, the
> suggestion was to only allow root access to the shotgun. Once it is opting
> in, behaviour of userptr should then be consistent across all
> configurations i.e. it should always avoid the mmu-notifier if userspace
> asks.

So first one thing to clear out: I think we don't need to care about
semantics around unmap, fork and friends - the kernel will hold
references to the backing storage pages in both cases. So I think both
cases are safe from userspace trying to trick the kernel.

Second thing to get out of the way: After all this discussion I think
it's ok to call fork&friends (though not unmap) as simply undefined
behaviour - the kernel might pick the old or the new address space
however it sees fit. Again, not something we need to care about. Now a
big exception would be if the behaviour around doing an early unmap
after a batchbuffer would differe between mmu-notifier based userptr
and pinning userptr. If that's indeed the case I'm seriously wondering
whether it's a good idea to merge both implementations under the same
interface. In that case I'd _much_ prefer we'd just stick with mmu
notifiers and have the same semantics everwhere.

So now the real deal: My argument for originally not merging userptr
is that pinning tons of GFP_MOVEABLE memory isn't nice. Presuming our
shrinker works (big if, I know) there shouldn't be an issue with
memory exhausting in both cases. And imo we really should keep page
migration working given how people constantly invent new uses of it.
Now I see that most of them are for servers, but:
- server-y stuff tends to trickle down
- we seem to have a server gpu product in preparation (the recent pile
of display/PCH-less patches from Ben).
So I want to keep page migration working, even when we have tons of
userptr-backed gem bos around.

Now looking and SNA userptr and this batch the result is that SNA
won't use mmu-notifier-based userptr ever. That's feels a lot like
adding a bit of code (mmu-notifier-based userptr support) to sneak in
a different feature (pinning userptr support). If mmu notifiers are
too damn slow, we need to fix that (or work around it with some
cache), not hide it by flat-out not using them.

Also, Eric's persistent ranting about root-only interfaces moved me
away from them, so now I'm even leaning a bit towards simply requiring
mmu-notifiers for userptr. It feels at least like the Right Thing ;-)

> As for testing using SNA, you can just ask SNA to only use mmu-notifier
> backed userptr and so only run when mmu-notifiers are available.

Given how good we are at testing ugly corner case (much better, but
still with gapping holes apparently), I want mmu-notifier-based
userptr to be tested on as many machines as possible. Not just mine,
once when merging, and then we have a beautiful fireworks once mesa
starts to use it since it all regressed.

>> >> - On a quick read I've seen some gtt mmap support remnants. This scares
>> >>   me, a roundabout njet! would appease. Though I think that should already
>> >>   happen with the checks we have to reject snoopable buffers?
>> >
>> > That's because there are platforms where it is theorectically possible
>> > and whilst I have no use case for it, I wanted to make it work
>> > nevertheless. I still think it is possible, but I could not see a way to
>> > do so without completely replacing the drm vm code.
>>
>> If I understand things correctly we should be able to block this
>> simply by refusing to create an mmap offset for a userptr backed
>> object.
>
> But I want to be able to support GTT mappings wherever possible. The
> basic principle of making sure every bo is first class, so that there
> are no surprises. Such as stolen, dmabuf...

I can buy that argument, but in that case I _really_ want full
test-coverage of all the exposed interfaces. userptr is imo a big
addition with high potential to expose new "interesting" corner cases,
so I want to (ab)use the opportunity to improve our testing coverage.
If that's around, I'll happily accept unrestricted userptr support.
But see below for some comments.

>> >> - I think we should reject set_cacheing requests on usersptr objects.
>> >
>> > I don't think that is strictly required, just like we should not limit
>> > the user from using set_tiling. (Though the user is never actually going
>> > to tell the kernel about such tiling...).
>>
>> Yeah, I guess we could allow people to shot their foot off. Otoh it
>> adds another dimension to the userptr interface, which we need to make
>> sure it works. Similar besides set_tiling is probably also
>> prime_export.
>>
>> The reason I'd prefer to lock things down is that we have fairly nice
>> coverage for normal gem objects wrt exercising corner-cases with igt.
>> If we don't disallow the same corner-cases with userptr, I'd prefer
>> the tests to also cover those ... Disallowing is cheaper ;-)
>
> See above: every bo should be first class...

Already mentioned above, but I do have my doubts that the in-page
offset support really does mesh with gtt (and cpu mmaps fwiw) in a
first-class manner.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
Jesse Barnes June 24, 2013, 9:36 p.m. UTC | #7
On Mon, 8 Apr 2013 21:24:58 +0200
Daniel Vetter <daniel@ffwll.ch> wrote:

> On Mon, Apr 8, 2013 at 7:40 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Mon, Apr 08, 2013 at 07:18:11PM +0200, Daniel Vetter wrote:
> >> On Tue, Feb 12, 2013 at 02:17:22PM +0000, Chris Wilson wrote:
> >> > By exporting the ability to map user address and inserting PTEs
> >> > representing their backing pages into the GTT, we can exploit UMA in order
> >> > to utilize normal application data as a texture source or even as a
> >> > render target (depending upon the capabilities of the chipset). This has
> >> > a number of uses, with zero-copy downloads to the GPU and efficient
> >> > readback making the intermixed streaming of CPU and GPU operations
> >> > fairly efficient. This ability has many widespread implications from
> >> > faster rendering of client-side software rasterisers (chromium),
> >> > mitigation of stalls due to read back (firefox) and to faster pipelining
> >> > of texture data (such as pixel buffer objects in GL or data blobs in CL).
> >> >
> >> > v2: Compile with CONFIG_MMU_NOTIFIER
> >> > v3: We can sleep while performing invalidate-range, which we can utilise
> >> > to drop our page references prior to the kernel manipulating the vma
> >> > (for either discard or cloning) and so protect normal users.
> >> > v4: Only run the invalidate notifier if the range intercepts the bo.
> >> > v5: Prevent userspace from attempting to GTT mmap non-page aligned buffers
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>
> >> Quick bikeshed:
> >> - Still not really in favour of the in-page gtt offset handling ... I
> >>   still think that this is just a fancy mmap api, and it better reject
> >>   attempts to not map anything aligned to a full page outright.
> >
> > Strongly disagree.
> 
> Ok, let's dig out the beaten arguments here ;-)
> - Imo the gtt_offset frobbery is a bit fragile, and getting this right
> in the face of ppgtt won't make it better. And yes the only reason we
> still have that field is that you've shot down any patch to remove it
> citing userptr here. So "it's here already" doesn't count ;-)
> - Userptr for i915 is an mmap interface, and that works on pages,
> lying to userspace isn't great.
> - I don't see why userspace can't do this themselves. I've seen that
> it makes things easier in SNA/X, but for a general purpose interface
> that argument doesn't cut too much.
> - I'm also a bit afraid that our code implicitly assumes that
> size/offset are always page-aligned and I kinda want to avoid that we
> have to audit for such issues from here on. We've blown up in the past
> assuming that size > 0 already, I think we're set to blow up on this
> one here.
> 
> In any case, if you really want to stick to this I want this to be
> explictly track in an obj->reloc_gtt_offset_adjustment or something
> which is very loudly yelling at people to make sure no one trips over
> it. Tracking the adjustment in a separate field, which would only ever
> be used in the reloc code would address all my concerns (safe for the
> api ugliness one).

Resurrecting this again.

I'm of two minds on the API here: on the one hand, it can be nicer for
the kernel to handle this stuff if it can be done easily, and save
userspace the trouble.  But OTOH, consistent with existing page based
interfaces makes things a little less jarring...

> 
> >> - I915_USERPTR_UNSYNCHRONIZED ... eeek. That means that despite everyone
> >>   having mmu notifiers enabled in their distro config, you make sure sna
> >>   doesn't hit it. Imo not enough testing coverage ;-) Or this there
> >>   another reason behind this than "mmu notifiers are too slow"?
> >>
> >>   Generally I'm a bit sloppy with going root-only for legacy X stuff (like
> >>   scanline waits), but this here looks very much generally useful. So not
> >>   exemption-material imo.
> >
> > Strongly disagree. Most of my machines do not have mmu-notifiers and
> > would still like to benefit from userptr and I see no reason why we need
> > to force mmu-notifiers.
> 
> Note that I didn't shout against the mmu_notifier-less support
> (although I'm honestly not too happy about it), what I don't like is
> the override bit disabling the mmu_notifiers even if we have them.
> Since that will mean that the code won't be tested through SNA, and so
> has a good chance of being buggy. Once mesa comes around and uses it,
> it'll nicely blow up. And one of the reason Jesse is breathing down my
> neck to merge this is "other guys are interested in this at intel".

I think we'll need good test cases to cover things regardless.  And
yes, an mmu notifier version that doesn't require root would be more
generally useful than a root only interface (or are those items
unrelated at this point?).  Having a flag for root only operation for
clients that know what they're doing is fine though, IMO.

I think one of the nice use cases the Mesa guys have is to save an
extra copy in glReadPixels for certain types of objects, which means
non-root is a requirement.  And it seems like we could do a
glReadPixels extension that would end up being zero copy with this
interface (provided it had pixel accessor functions too to deal with
de-swizzling).

> 
> >> - On a quick read I've seen some gtt mmap support remnants. This scares
> >>   me, a roundabout njet! would appease. Though I think that should already
> >>   happen with the checks we have to reject snoopable buffers?
> >
> > That's because there are platforms where it is theorectically possible
> > and whilst I have no use case for it, I wanted to make it work
> > nevertheless. I still think it is possible, but I could not see a way to
> > do so without completely replacing the drm vm code.
> 
> If I understand things correctly we should be able to block this
> simply by refusing to create an mmap offset for a userptr backed
> object.

Presumably the user has it mapped already through some other means, so
this shouldn't be a big limitation.  Would it simplify things a lot
and/or drop a lot of code?

> >> - I think we should reject set_cacheing requests on usersptr objects.
> >
> > I don't think that is strictly required, just like we should not limit
> > the user from using set_tiling. (Though the user is never actually going
> > to tell the kernel about such tiling...).
> 
> Yeah, I guess we could allow people to shot their foot off. Otoh it
> adds another dimension to the userptr interface, which we need to make
> sure it works. Similar besides set_tiling is probably also
> prime_export.
> 
> The reason I'd prefer to lock things down is that we have fairly nice
> coverage for normal gem objects wrt exercising corner-cases with igt.
> If we don't disallow the same corner-cases with userptr, I'd prefer
> the tests to also cover those ... Disallowing is cheaper ;-)

Do we have a good use case for allowing the tiling and/or caching
requests?  If not, yeah I'd just say punt on it for now.

So, any chance we can land this for 3.12?  Would be nice to have a text
file or something describing usage too...
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 91f3ac6..42858f6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -14,6 +14,7 @@  i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_gem_gtt.o \
 	  i915_gem_stolen.o \
 	  i915_gem_tiling.o \
+	  i915_gem_userptr.o \
 	  i915_sysfs.o \
 	  i915_trace_points.o \
 	  i915_ums.o \
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4fa6beb..9b1984c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1883,6 +1883,7 @@  struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_USERPTR, i915_gem_userptr_ioctl, DRM_UNLOCKED),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 923dc0a..90070f4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -42,6 +42,7 @@ 
 #include <linux/backlight.h>
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
+#include <linux/mmu_notifier.h>
 #include <linux/pm_qos.h>
 
 /* General customization:
@@ -1076,6 +1077,7 @@  struct drm_i915_gem_object_ops {
 	 */
 	int (*get_pages)(struct drm_i915_gem_object *);
 	void (*put_pages)(struct drm_i915_gem_object *);
+	void (*release)(struct drm_i915_gem_object *);
 };
 
 struct drm_i915_gem_object {
@@ -1222,6 +1224,23 @@  struct drm_i915_gem_object {
 };
 #define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
 
+struct i915_gem_userptr_object {
+	struct drm_i915_gem_object gem;
+	uintptr_t user_ptr;
+	size_t user_size;
+	int read_only;
+
+	struct mm_struct *mm;
+#if defined(CONFIG_MMU_NOTIFIER)
+	struct mmu_notifier mn;
+#endif
+};
+
+union drm_i915_gem_objects {
+	struct drm_i915_gem_object base;
+	struct i915_gem_userptr_object userptr;
+};
+
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
 /**
@@ -1501,6 +1520,8 @@  int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
 int i915_gem_set_tiling(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_get_tiling(struct drm_device *dev, void *data,
@@ -1554,6 +1575,7 @@  static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 	BUG_ON(obj->pages_pin_count == 0);
 	obj->pages_pin_count--;
 }
+int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 
 int __must_check i915_mutex_lock_interruptible(struct drm_device *dev);
 int i915_gem_object_sync(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 73b1e9e..65a36bf 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1336,20 +1336,18 @@  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	pgoff_t page_offset;
+	pgoff_t offset;
 	unsigned long pfn;
 	int ret = 0;
 	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
 
-	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
-
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		goto out;
 
-	trace_i915_gem_object_fault(obj, page_offset, true, write);
+	trace_i915_gem_object_fault(obj,
+				    (unsigned long)(vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT,
+				    true, write);
 
 	/* Access to snoopable pages through the GTT is incoherent. */
 	if (obj->cache_level != I915_CACHE_NONE && !HAS_LLC(dev)) {
@@ -1372,8 +1370,10 @@  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 	obj->fault_mappable = true;
 
-	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
-		page_offset;
+	/* We don't use vmf->pgoff since that has the fake offset */
+	offset = (unsigned long)vmf->virtual_address - vma->vm_start;
+	offset += obj->gtt_offset;
+	pfn = (dev_priv->gtt.mappable_base + offset) >> PAGE_SHIFT;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
@@ -1565,6 +1565,12 @@  i915_gem_mmap_gtt(struct drm_file *file,
 		goto out;
 	}
 
+	if (offset_in_page(obj->gtt_offset)) {
+		DRM_ERROR("Attempting to mmap an unaligned buffer\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
 	ret = i915_gem_object_create_mmap_offset(obj);
 	if (ret)
 		goto out;
@@ -2495,9 +2501,9 @@  i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	/* Avoid an unnecessary call to unbind on rebind. */
 	obj->map_and_fenceable = true;
 
+	obj->gtt_offset -= obj->gtt_space->start;
 	drm_mm_put_block(obj->gtt_space);
 	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
 
 	return 0;
 }
@@ -2987,7 +2993,7 @@  i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
 	obj->gtt_space = node;
-	obj->gtt_offset = node->start;
+	obj->gtt_offset += node->start;
 
 	fenceable =
 		node->size == fence_size &&
@@ -3800,6 +3806,9 @@  void i915_gem_free_object(struct drm_gem_object *gem_obj)
 	if (obj->base.import_attach)
 		drm_prime_gem_destroy(&obj->base, NULL);
 
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
 
@@ -4101,7 +4110,7 @@  i915_gem_load(struct drm_device *dev)
 
 	dev_priv->slab =
 		kmem_cache_create("i915_gem_object",
-				  sizeof(struct drm_i915_gem_object), 0,
+				  sizeof(union drm_i915_gem_objects), 0,
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2726910..a3e68af 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -254,14 +254,16 @@  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 		return -EFAULT;
 
 	reloc->delta += target_offset;
+	reloc->offset += obj->gtt_offset;
 	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
+		uint32_t page_offset = offset_in_page(reloc->offset);
 		char *vaddr;
 
 		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 		if (ret)
 			return ret;
 
+		reloc->offset -= obj->gtt_space->start;
 		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
 							     reloc->offset >> PAGE_SHIFT));
 		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
@@ -280,11 +282,10 @@  i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 			return ret;
 
 		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += obj->gtt_offset;
 		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
 						      reloc->offset & PAGE_MASK);
 		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
+			(reloc_page + offset_in_page(reloc->offset));
 		iowrite32(reloc->delta, reloc_entry);
 		io_mapping_unmap_atomic(reloc_page);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
new file mode 100644
index 0000000..f93fa1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -0,0 +1,329 @@ 
+/*
+ * Copyright © 2012 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "i915_drm.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/mmu_notifier.h>
+#include <linux/swap.h>
+
+static struct i915_gem_userptr_object *to_userptr_object(struct drm_i915_gem_object *obj)
+{
+	return container_of(obj, struct i915_gem_userptr_object, gem);
+}
+
+#if defined(CONFIG_MMU_NOTIFIER)
+static void i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *mn,
+						       struct mm_struct *mm,
+						       unsigned long start,
+						       unsigned long end)
+{
+	struct i915_gem_userptr_object *vmap;
+	struct drm_device *dev;
+
+	/* XXX race between obj unref and mmu notifier? */
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+
+	if (vmap->user_ptr >= end || vmap->user_ptr + vmap->user_size <= start)
+		return;
+
+	if (vmap->gem.pages == NULL) /* opportunistic check */
+		return;
+
+	dev = vmap->gem.base.dev;
+	mutex_lock(&dev->struct_mutex);
+	if (vmap->gem.gtt_space) {
+		struct drm_i915_private *dev_priv = dev->dev_private;
+		bool was_interruptible;
+		int ret;
+
+		was_interruptible = dev_priv->mm.interruptible;
+		dev_priv->mm.interruptible = false;
+
+		ret = i915_gem_object_unbind(&vmap->gem);
+		BUG_ON(ret && ret != -EIO);
+
+		dev_priv->mm.interruptible = was_interruptible;
+	}
+
+	BUG_ON(i915_gem_object_put_pages(&vmap->gem));
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void i915_gem_userptr_mn_release(struct mmu_notifier *mn,
+					struct mm_struct *mm)
+{
+	struct i915_gem_userptr_object *vmap;
+
+	vmap = container_of(mn, struct i915_gem_userptr_object, mn);
+	BUG_ON(vmap->mm != mm);
+	vmap->mm = NULL;
+
+	/* XXX Schedule an eventual unbind? E.g. hook into require request?
+	 * However, locking will be complicated.
+	 */
+}
+
+static const struct mmu_notifier_ops i915_gem_userptr_notifier = {
+	.invalidate_range_start = i915_gem_userptr_mn_invalidate_range_start,
+	.release = i915_gem_userptr_mn_release,
+};
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+	if (vmap->mn.ops && vmap->mm) {
+		mmu_notifier_unregister(&vmap->mn, vmap->mm);
+		BUG_ON(vmap->mm);
+	}
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if (flags & I915_USERPTR_UNSYNCHRONIZED)
+		return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;
+
+	vmap->mn.ops = &i915_gem_userptr_notifier;
+	return mmu_notifier_register(&vmap->mn, vmap->mm);
+}
+
+#else
+
+static void
+i915_gem_userptr_release__mmu_notifier(struct i915_gem_userptr_object *vmap)
+{
+}
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct i915_gem_userptr_object *vmap,
+				    unsigned flags)
+{
+	if ((flags & I915_USERPTR_UNSYNCHRONIZED) == 0)
+		return -ENODEV;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	return 0;
+}
+#endif
+
+static int
+i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+	int num_pages = obj->base.size >> PAGE_SHIFT;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	struct page **pvec;
+	int n, pinned, ret;
+
+	if (vmap->mm == NULL)
+		return -EFAULT;
+
+	if (!access_ok(vmap->read_only ? VERIFY_READ : VERIFY_WRITE,
+		       (char __user *)vmap->user_ptr, vmap->user_size))
+		return -EFAULT;
+
+	/* If userspace should engineer that these pages are replaced in
+	 * the vma between us binding this page into the GTT and completion
+	 * of rendering... Their loss. If they change the mapping of their
+	 * pages they need to create a new bo to point to the new vma.
+	 *
+	 * However, that still leaves open the possibility of the vma
+	 * being copied upon fork. Which falls under the same userspace
+	 * synchronisation issue as a regular bo, except that this time
+	 * the process may not be expecting that a particular piece of
+	 * memory is tied to the GPU.
+	 *
+	 * Fortunately, we can hook into the mmu_notifier in order to
+	 * discard the page references prior to anything nasty happening
+	 * to the vma (discard or cloning) which should prevent the more
+	 * egregious cases from causing harm.
+	 */
+
+	pvec = kmalloc(num_pages*sizeof(struct page *),
+		       GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (pvec == NULL) {
+		pvec = drm_malloc_ab(num_pages, sizeof(struct page *));
+		if (pvec == NULL)
+			return -ENOMEM;
+	}
+
+	pinned = 0;
+	if (vmap->mm == current->mm)
+		pinned = __get_user_pages_fast(vmap->user_ptr, num_pages,
+					       !vmap->read_only, pvec);
+	if (pinned < num_pages) {
+		struct mm_struct *mm = vmap->mm;
+		ret = 0;
+		mutex_unlock(&obj->base.dev->struct_mutex);
+		down_read(&mm->mmap_sem);
+		if (vmap->mm != NULL)
+			ret = get_user_pages(current, mm,
+					     vmap->user_ptr + (pinned << PAGE_SHIFT),
+					     num_pages - pinned,
+					     !vmap->read_only, 0,
+					     pvec + pinned,
+					     NULL);
+		up_read(&mm->mmap_sem);
+		mutex_lock(&obj->base.dev->struct_mutex);
+		if (ret > 0)
+			pinned += ret;
+
+		if (obj->pages || pinned < num_pages) {
+			ret = obj->pages ? 0 : -EFAULT;
+			goto cleanup_pinned;
+		}
+	}
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL) {
+		ret = -ENOMEM;
+		goto cleanup_pinned;
+	}
+
+	if (sg_alloc_table(st, num_pages, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto cleanup_st;
+	}
+
+	for_each_sg(st->sgl, sg, num_pages, n)
+		sg_set_page(sg, pvec[n], PAGE_SIZE, 0);
+	drm_free_large(pvec);
+
+	obj->pages = st;
+	return 0;
+
+cleanup_st:
+	kfree(st);
+cleanup_pinned:
+	release_pages(pvec, pinned, 0);
+	drm_free_large(pvec);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (obj->madv != I915_MADV_WILLNEED)
+		obj->dirty = 0;
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (obj->dirty)
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+	obj->dirty = 0;
+
+	sg_free_table(obj->pages);
+	kfree(obj->pages);
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_userptr_object *vmap = to_userptr_object(obj);
+
+	i915_gem_userptr_release__mmu_notifier(vmap);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.release = i915_gem_userptr_release,
+};
+
+/**
+ * Creates a new mm object that wraps some user memory.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_userptr *args = data;
+	struct i915_gem_userptr_object *obj;
+	loff_t first_data_page, last_data_page;
+	int num_pages;
+	int ret;
+	u32 handle;
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY | I915_USERPTR_UNSYNCHRONIZED))
+		return -EINVAL;
+
+	first_data_page = args->user_ptr / PAGE_SIZE;
+	last_data_page = (args->user_ptr + args->user_size - 1) / PAGE_SIZE;
+	num_pages = last_data_page - first_data_page + 1;
+	if (num_pages * PAGE_SIZE > dev_priv->gtt.total)
+		return -E2BIG;
+
+	/* Allocate the new object */
+	obj = i915_gem_object_alloc(dev);
+	if (obj == NULL)
+		return -ENOMEM;
+
+	if (drm_gem_private_object_init(dev, &obj->gem.base,
+					num_pages * PAGE_SIZE)) {
+		i915_gem_object_free(&obj->gem);
+		return -ENOMEM;
+	}
+
+	i915_gem_object_init(&obj->gem, &i915_gem_userptr_ops);
+	obj->gem.cache_level = I915_CACHE_LLC_MLC;
+
+	obj->gem.gtt_offset = offset_in_page(args->user_ptr);
+	obj->user_ptr = args->user_ptr;
+	obj->user_size = args->user_size;
+	obj->read_only = args->flags & I915_USERPTR_READ_ONLY;
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	obj->mm = current->mm;
+	ret = i915_gem_userptr_init__mmu_notifier(obj, args->flags);
+	if (ret)
+		return ret;
+
+	ret = drm_gem_handle_create(file, &obj->gem.base, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference(&obj->gem.base);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 07d5941..20e39be 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -198,6 +198,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_SET_CACHING	0x2f
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
+#define DRM_I915_GEM_USERPTR		0x32
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -247,6 +248,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_CREATE	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GEM_USERPTR			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -980,4 +982,18 @@  struct drm_i915_reg_read {
 	__u64 offset;
 	__u64 val; /* Return value */
 };
+
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u32 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	 * Returned handle for the object.
+	 *
+	 * Object handles are nonzero.
+	 */
+	__u32 handle;
+};
 #endif /* _UAPI_I915_DRM_H_ */