diff mbox

[RFC] libdrm_intel: Add support for userptr objects

Message ID 1393432901-31951-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin Feb. 26, 2014, 4:41 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Allow userptr objects to be created and used via libdrm_intel.

At the moment tiling and mapping to GTT aperture is not supported
due hardware limitations across different generations and uncertainty
about its usefulness.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 include/drm/i915_drm.h    |  16 +++++
 intel/intel_bufmgr.c      |  13 ++++
 intel/intel_bufmgr.h      |   5 ++
 intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
 intel/intel_bufmgr_priv.h |  12 +++-
 5 files changed, 198 insertions(+), 2 deletions(-)

Comments

Ben Widawsky May 1, 2014, 6:47 p.m. UTC | #1
On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Allow userptr objects to be created and used via libdrm_intel.
> 
> At the moment tiling and mapping to GTT aperture is not supported
> due hardware limitations across different generations and uncertainty
> about its usefulness.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  include/drm/i915_drm.h    |  16 +++++
>  intel/intel_bufmgr.c      |  13 ++++
>  intel/intel_bufmgr.h      |   5 ++
>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>  intel/intel_bufmgr_priv.h |  12 +++-
>  5 files changed, 198 insertions(+), 2 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 2f4eb8c..d32ef99 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
>  	__u64 offset;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;

Adding alignment might be a safe bet.

> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1

I'll eventually want something like:
#define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */

> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	* Returned handle for the object.
> +	*
> +	* Object handles are nonzero.
> +	*/
> +	__u32 handle;
> +};
> +
>  struct drm_i915_gem_set_domain {
>  	/** Handle for the object */
>  	__u32 handle;
> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> index 905556f..7f3d795 100644
> --- a/intel/intel_bufmgr.c
> +++ b/intel/intel_bufmgr.c
> @@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>  	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
>  }
>  
> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> +					  const char *name, void *addr,
> +					  uint32_t tiling_mode,
> +					  uint32_t stride,
> +					  unsigned long size,
> +					  unsigned long flags)
> +{
> +	if (bufmgr->bo_alloc_userptr)
> +		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
> +						stride, size, flags);
> +	return NULL;
> +}
> +
>  drm_intel_bo *
>  drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>                          int x, int y, int cpp, uint32_t *tiling_mode,
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 9383c72..be83a56 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>  					    const char *name,
>  					    unsigned long size,
>  					    unsigned int alignment);
> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> +					const char *name,
> +					void *addr, uint32_t tiling_mode,
> +					uint32_t stride, unsigned long size,
> +					unsigned long flags);
>  drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
>  				       const char *name,
>  				       int x, int y, int cpp,
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 007a6d8..7cad945 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
>  	void *mem_virtual;
>  	/** GTT virtual address for the buffer, saved across map/unmap cycles */
>  	void *gtt_virtual;
> +	/**
> +	 * Virtual address of the buffer allocated by user, used for userptr
> +	 * objects only.
> +	 */
> +	void *user_virtual;

Can we not just re-use mem_virtual? Do you intend to provide the ability
to have two distinct CPU mappings to the same object?

>  	int map_count;
>  	drmMMListHead vma_list;
>  
> @@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
>  	bool idle;
>  
>  	/**
> +	 * Boolean of whether this buffer was allocated with userptr
> +	 */
> +	bool is_userptr;
> +
> +	/**
>  	 * Size in bytes of this buffer and its relocation descendents.
>  	 *
>  	 * Used to avoid costly tree walking in
> @@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>  					       tiling, stride);
>  }
>  
> +static drm_intel_bo *
> +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> +				const char *name,
> +				void *addr,
> +				uint32_t tiling_mode,
> +				uint32_t stride,
> +				unsigned long size,
> +				unsigned long flags)

I'd vote for dropping tiling_mode, stride. Make size and flags
explicitly sized types. Not doing this has bitten us already.

> +{
> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
> +	drm_intel_bo_gem *bo_gem;
> +	int ret;
> +	struct drm_i915_gem_userptr userptr;
> +
> +	/* Tiling with userptr surfaces is not supported
> +	 * on all hardware so refuse it for time being.
> +	 */
> +	if (tiling_mode != I915_TILING_NONE)
> +		return NULL;
> +
> +	bo_gem = calloc(1, sizeof(*bo_gem));
> +	if (!bo_gem)
> +		return NULL;
> +
> +	bo_gem->bo.size = size;
> +
> +	VG_CLEAR(userptr);
> +	userptr.user_ptr = (__u64)((unsigned long)addr);

How are we getting away with non page aligned addresses? I'd vote for
moving some of the memalign checks into here if all cases require page
aligned.

> +	userptr.user_size = size;
> +	userptr.flags = flags;
> +
> +	ret = drmIoctl(bufmgr_gem->fd,
> +			DRM_IOCTL_I915_GEM_USERPTR,
> +			&userptr);
> +	if (ret != 0) {
> +		DBG("bo_create_userptr: "
> +		    "ioctl failed with user ptr %p size 0x%lx, "
> +		    "user flags 0x%lx\n", addr, size, flags);
> +		free(bo_gem);
> +		return NULL;
> +	}
> +
> +	bo_gem->gem_handle = userptr.handle;
> +	bo_gem->bo.handle = bo_gem->gem_handle;
> +	bo_gem->bo.bufmgr    = bufmgr;
> +	bo_gem->is_userptr   = true;
> +	bo_gem->bo.virtual   = addr;
> +	/* Save the address provided by user */
> +	bo_gem->user_virtual = addr;
> +	bo_gem->tiling_mode  = I915_TILING_NONE;
> +	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> +	bo_gem->stride       = 0;
> +
> +	DRMINITLISTHEAD(&bo_gem->name_list);
> +	DRMINITLISTHEAD(&bo_gem->vma_list);
> +
> +	bo_gem->name = name;
> +	atomic_set(&bo_gem->refcount, 1);
> +	bo_gem->validate_index = -1;
> +	bo_gem->reloc_tree_fences = 0;
> +	bo_gem->used_as_reloc_target = false;
> +	bo_gem->has_error = false;
> +	bo_gem->reusable = false;

TODO for someone: Some room to consolidate this with the other create
functions.

> +
> +	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);

I'm looking at the code and trying to figure out how this is relevant.
It's not introduced by you directly. Just looking at the math it all
seems to kind of mix-up aperture and non-aperture usages. Just talking
to myself...

> +
> +	DBG("bo_create_userptr: "
> +	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
> +		addr, bo_gem->gem_handle, bo_gem->name,
> +		size, stride, tiling_mode);
> +
> +	return &bo_gem->bo;
> +}
> +
>  /**
>   * Returns a drm_intel_bo wrapping the given buffer object handle.
>   *
> @@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>  	struct drm_i915_gem_set_domain set_domain;
>  	int ret;
>  
> +	if (bo_gem->is_userptr) {
> +		/* Return the same user ptr */
> +		bo->virtual = bo_gem->user_virtual;
> +		return 0;
> +	}
> +
>  	pthread_mutex_lock(&bufmgr_gem->lock);
>  
>  	if (bo_gem->map_count++ == 0)
> @@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>  	int ret;
>  
> +	if (bo_gem->is_userptr)
> +		return -EINVAL;
> +
>  	if (bo_gem->map_count++ == 0)
>  		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
>  
> @@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
>  
>  static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
>  {
> -	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +	drm_intel_bufmgr_gem *bufmgr_gem;

I'm missing why this hunk was changed. But, okay.

>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>  	int ret = 0;
>  
>  	if (bo == NULL)
>  		return 0;
>  
> +	if (bo_gem->is_userptr)
> +		return 0;
> +
> +	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +
>  	pthread_mutex_lock(&bufmgr_gem->lock);
>  
>  	if (bo_gem->map_count <= 0) {
> @@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
>  	struct drm_i915_gem_pwrite pwrite;
>  	int ret;
>  
> +	if (bo_gem->is_userptr)
> +		return -EINVAL;
> +
>  	VG_CLEAR(pwrite);
>  	pwrite.handle = bo_gem->gem_handle;
>  	pwrite.offset = offset;
> @@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
>  	struct drm_i915_gem_pread pread;
>  	int ret;
>  
> +	if (bo_gem->is_userptr)
> +		return -EINVAL;
> +
>  	VG_CLEAR(pread);
>  	pread.handle = bo_gem->gem_handle;
>  	pread.offset = offset;
> @@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>  	int ret;
>  
> +	/* Tiling with userptr surfaces is not supported
> +	 * on all hardware so refuse it for time being.
> +	 */
> +	if (bo_gem->is_userptr)
> +		return -EINVAL;
> +
>  	/* Linear buffers have no stride. By ensuring that we only ever use
>  	 * stride 0 with linear buffers, we simplify our code.
>  	 */
> @@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
>  	bo_gem->aub_annotation_count = count;
>  }
>  
> +static bool
> +has_userptr(int fd)
> +{
> +	int ret;
> +	void *ptr;
> +	long pgsz;
> +	struct drm_i915_gem_userptr userptr;
> +	struct drm_gem_close close_bo;
> +
> +	pgsz = sysconf(_SC_PAGESIZE);
> +	assert(ret > 0);
> +
> +	ret = posix_memalign(&ptr, pgsz, pgsz);
> +	assert(ret == 0);
> +
> +	memset(&userptr, 0, sizeof(userptr));
> +	userptr.user_ptr = (__u64)(unsigned long)ptr;
> +	userptr.user_size = pgsz;
> +
> +retry:
> +	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
> +	if (ret) {
> +		if (errno == ENODEV && userptr.flags == 0) {
> +			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
> +			goto retry;
> +		}
> +		free(ptr);
> +		return false;
> +	}
> +
> +	close_bo.handle = userptr.handle;
> +	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
> +	assert(ret == 0);
> +	free(ptr);
> +
> +	return true;
> +}

I think a param for USERPTR is warranted. Or did we decide not to do
this already?

> +
>  /**
>   * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
>   * and manage map buffer objections.
> @@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>  	bufmgr_gem->has_relaxed_fencing = ret == 0;
>  
> +	if (has_userptr(fd))
> +		bufmgr_gem->bufmgr.bo_alloc_userptr =
> +			drm_intel_gem_bo_alloc_userptr;
> +
>  	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>  	bufmgr_gem->has_wait_timeout = ret == 0;
> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> index 2592d42..3aa1abb 100644
> --- a/intel/intel_bufmgr_priv.h
> +++ b/intel/intel_bufmgr_priv.h
> @@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
>  					      const char *name,
>  					      unsigned long size,
>  					      unsigned int alignment);
> -
> +	/**
> +	 * Allocate a buffer object from an existing user accessible
> +	 * address malloc'd with the provided size.
> +	 * Alignment is used when mapping to the gtt.
> +	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
> +	 */
> +	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
> +					  const char *name, void *addr,
> +					  uint32_t tiling_mode, uint32_t stride,
> +					  unsigned long size,
> +					  unsigned long flags);
>  	/**
>  	 * Allocate a tiled buffer object.
>  	 *

Probably don't need the special function pointer yet since I don't think
we can yet envision use cases which will require any kind of special
handling. A simple has_userptr in bufmgr_gem will probably suffice. But
I don't care too much either way.

I couldn't spot any real bugs...
Tvrtko Ursulin May 2, 2014, 10:27 a.m. UTC | #2
On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Allow userptr objects to be created and used via libdrm_intel.
>>
>> At the moment tiling and mapping to GTT aperture is not supported
>> due hardware limitations across different generations and uncertainty
>> about its usefulness.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   include/drm/i915_drm.h    |  16 +++++
>>   intel/intel_bufmgr.c      |  13 ++++
>>   intel/intel_bufmgr.h      |   5 ++
>>   intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>>   intel/intel_bufmgr_priv.h |  12 +++-
>>   5 files changed, 198 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index 2f4eb8c..d32ef99 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_I915_GEM_GET_CACHING	0x30
>>   #define DRM_I915_REG_READ		0x31
>>   #define DRM_I915_GET_RESET_STATS	0x32
>> +#define DRM_I915_GEM_USERPTR		0x34
>>
>>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>>   #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>>   #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>>   #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
>> +#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
>>
>>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>>    * on the security mechanisms provided by hardware.
>> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
>>   	__u64 offset;
>>   };
>>
>> +struct drm_i915_gem_userptr {
>> +	__u64 user_ptr;
>> +	__u64 user_size;
>
> Adding alignment might be a safe bet.

Hmmmm, at first I thought you are raising a good point. But then I don't 
understand why I don't see any aligned types in 
linux/include/uapi/drm/i915_drm.h ?

>> +	__u32 flags;
>> +#define I915_USERPTR_READ_ONLY 0x1
>
> I'll eventually want something like:
> #define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */

Plenty of free flags. :)

>> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
>> +	/**
>> +	* Returned handle for the object.
>> +	*
>> +	* Object handles are nonzero.
>> +	*/
>> +	__u32 handle;
>> +};
>> +
>>   struct drm_i915_gem_set_domain {
>>   	/** Handle for the object */
>>   	__u32 handle;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 905556f..7f3d795 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>>   	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
>>   }
>>
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +					  const char *name, void *addr,
>> +					  uint32_t tiling_mode,
>> +					  uint32_t stride,
>> +					  unsigned long size,
>> +					  unsigned long flags)
>> +{
>> +	if (bufmgr->bo_alloc_userptr)
>> +		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
>> +						stride, size, flags);
>> +	return NULL;
>> +}
>> +
>>   drm_intel_bo *
>>   drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>>                           int x, int y, int cpp, uint32_t *tiling_mode,
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 9383c72..be83a56 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>>   					    const char *name,
>>   					    unsigned long size,
>>   					    unsigned int alignment);
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +					const char *name,
>> +					void *addr, uint32_t tiling_mode,
>> +					uint32_t stride, unsigned long size,
>> +					unsigned long flags);
>>   drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
>>   				       const char *name,
>>   				       int x, int y, int cpp,
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 007a6d8..7cad945 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
>>   	void *mem_virtual;
>>   	/** GTT virtual address for the buffer, saved across map/unmap cycles */
>>   	void *gtt_virtual;
>> +	/**
>> +	 * Virtual address of the buffer allocated by user, used for userptr
>> +	 * objects only.
>> +	 */
>> +	void *user_virtual;
>
> Can we not just re-use mem_virtual? Do you intend to provide the ability
> to have two distinct CPU mappings to the same object?

Haven't heard anything like that. By quick look at the code seems that 
reusing mem_virtual would require special casing some parts which touch 
it. So it may be it is actually cleaner as it is.

>>   	int map_count;
>>   	drmMMListHead vma_list;
>>
>> @@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
>>   	bool idle;
>>
>>   	/**
>> +	 * Boolean of whether this buffer was allocated with userptr
>> +	 */
>> +	bool is_userptr;
>> +
>> +	/**
>>   	 * Size in bytes of this buffer and its relocation descendents.
>>   	 *
>>   	 * Used to avoid costly tree walking in
>> @@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>>   					       tiling, stride);
>>   }
>>
>> +static drm_intel_bo *
>> +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +				const char *name,
>> +				void *addr,
>> +				uint32_t tiling_mode,
>> +				uint32_t stride,
>> +				unsigned long size,
>> +				unsigned long flags)
>
> I'd vote for dropping tiling_mode, stride. Make size and flags
> explicitly sized types. Not doing this has bitten us already.

Some people expressed interest in tiling so I thought to preserve it in 
the API.

Kernel even allows it, and it is just because Chris found bspec 
references saying it will break badly on some platforms, that I (or 
maybe it was both of us) decided to disable it on this level for time 
being.

So I think it is just a matter of coming up with a blacklist and it 
would be possible to use it then.

>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
>> +	drm_intel_bo_gem *bo_gem;
>> +	int ret;
>> +	struct drm_i915_gem_userptr userptr;
>> +
>> +	/* Tiling with userptr surfaces is not supported
>> +	 * on all hardware so refuse it for time being.
>> +	 */
>> +	if (tiling_mode != I915_TILING_NONE)
>> +		return NULL;
>> +
>> +	bo_gem = calloc(1, sizeof(*bo_gem));
>> +	if (!bo_gem)
>> +		return NULL;
>> +
>> +	bo_gem->bo.size = size;
>> +
>> +	VG_CLEAR(userptr);
>> +	userptr.user_ptr = (__u64)((unsigned long)addr);
>
> How are we getting away with non page aligned addresses? I'd vote for
> moving some of the memalign checks into here if all cases require page
> aligned.

Kernel will reject any unaligned address or size in the ioctl.

>> +	userptr.user_size = size;
>> +	userptr.flags = flags;
>> +
>> +	ret = drmIoctl(bufmgr_gem->fd,
>> +			DRM_IOCTL_I915_GEM_USERPTR,
>> +			&userptr);
>> +	if (ret != 0) {
>> +		DBG("bo_create_userptr: "
>> +		    "ioctl failed with user ptr %p size 0x%lx, "
>> +		    "user flags 0x%lx\n", addr, size, flags);
>> +		free(bo_gem);
>> +		return NULL;
>> +	}
>> +
>> +	bo_gem->gem_handle = userptr.handle;
>> +	bo_gem->bo.handle = bo_gem->gem_handle;
>> +	bo_gem->bo.bufmgr    = bufmgr;
>> +	bo_gem->is_userptr   = true;
>> +	bo_gem->bo.virtual   = addr;
>> +	/* Save the address provided by user */
>> +	bo_gem->user_virtual = addr;
>> +	bo_gem->tiling_mode  = I915_TILING_NONE;
>> +	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
>> +	bo_gem->stride       = 0;
>> +
>> +	DRMINITLISTHEAD(&bo_gem->name_list);
>> +	DRMINITLISTHEAD(&bo_gem->vma_list);
>> +
>> +	bo_gem->name = name;
>> +	atomic_set(&bo_gem->refcount, 1);
>> +	bo_gem->validate_index = -1;
>> +	bo_gem->reloc_tree_fences = 0;
>> +	bo_gem->used_as_reloc_target = false;
>> +	bo_gem->has_error = false;
>> +	bo_gem->reusable = false;
>
> TODO for someone: Some room to consolidate this with the other create
> functions.
>
>> +
>> +	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
>
> I'm looking at the code and trying to figure out how this is relevant.
> It's not introduced by you directly. Just looking at the math it all
> seems to kind of mix-up aperture and non-aperture usages. Just talking
> to myself...
>
>> +
>> +	DBG("bo_create_userptr: "
>> +	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
>> +		addr, bo_gem->gem_handle, bo_gem->name,
>> +		size, stride, tiling_mode);
>> +
>> +	return &bo_gem->bo;
>> +}
>> +
>>   /**
>>    * Returns a drm_intel_bo wrapping the given buffer object handle.
>>    *
>> @@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>>   	struct drm_i915_gem_set_domain set_domain;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr) {
>> +		/* Return the same user ptr */
>> +		bo->virtual = bo_gem->user_virtual;
>> +		return 0;
>> +	}
>> +
>>   	pthread_mutex_lock(&bufmgr_gem->lock);
>>
>>   	if (bo_gem->map_count++ == 0)
>> @@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	if (bo_gem->map_count++ == 0)
>>   		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
>>
>> @@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
>>
>>   static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
>>   {
>> -	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bufmgr_gem *bufmgr_gem;
>
> I'm missing why this hunk was changed. But, okay.

Just to ensure NULL bo is handled nicely I guess.

>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret = 0;
>>
>>   	if (bo == NULL)
>>   		return 0;
>>
>> +	if (bo_gem->is_userptr)
>> +		return 0;
>> +
>> +	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +
>>   	pthread_mutex_lock(&bufmgr_gem->lock);
>>
>>   	if (bo_gem->map_count <= 0) {
>> @@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
>>   	struct drm_i915_gem_pwrite pwrite;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	VG_CLEAR(pwrite);
>>   	pwrite.handle = bo_gem->gem_handle;
>>   	pwrite.offset = offset;
>> @@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
>>   	struct drm_i915_gem_pread pread;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	VG_CLEAR(pread);
>>   	pread.handle = bo_gem->gem_handle;
>>   	pread.offset = offset;
>> @@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret;
>>
>> +	/* Tiling with userptr surfaces is not supported
>> +	 * on all hardware so refuse it for time being.
>> +	 */
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	/* Linear buffers have no stride. By ensuring that we only ever use
>>   	 * stride 0 with linear buffers, we simplify our code.
>>   	 */
>> @@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
>>   	bo_gem->aub_annotation_count = count;
>>   }
>>
>> +static bool
>> +has_userptr(int fd)
>> +{
>> +	int ret;
>> +	void *ptr;
>> +	long pgsz;
>> +	struct drm_i915_gem_userptr userptr;
>> +	struct drm_gem_close close_bo;
>> +
>> +	pgsz = sysconf(_SC_PAGESIZE);
>> +	assert(ret > 0);
>> +
>> +	ret = posix_memalign(&ptr, pgsz, pgsz);
>> +	assert(ret == 0);
>> +
>> +	memset(&userptr, 0, sizeof(userptr));
>> +	userptr.user_ptr = (__u64)(unsigned long)ptr;
>> +	userptr.user_size = pgsz;
>> +
>> +retry:
>> +	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
>> +	if (ret) {
>> +		if (errno == ENODEV && userptr.flags == 0) {
>> +			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
>> +			goto retry;
>> +		}
>> +		free(ptr);
>> +		return false;
>> +	}
>> +
>> +	close_bo.handle = userptr.handle;
>> +	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
>> +	assert(ret == 0);
>> +	free(ptr);
>> +
>> +	return true;
>> +}
>
> I think a param for USERPTR is warranted. Or did we decide not to do
> this already?

I asked for it, but people with authority said no. It is all very weak, 
fragile and dangerous anyway - param or feature detect like the above.

We would really need a proper feature detect mechanism, like text based 
in sysfs or something.

>> +
>>   /**
>>    * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
>>    * and manage map buffer objections.
>> @@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>>   	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>>   	bufmgr_gem->has_relaxed_fencing = ret == 0;
>>
>> +	if (has_userptr(fd))
>> +		bufmgr_gem->bufmgr.bo_alloc_userptr =
>> +			drm_intel_gem_bo_alloc_userptr;
>> +
>>   	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
>>   	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>>   	bufmgr_gem->has_wait_timeout = ret == 0;
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 2592d42..3aa1abb 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
>>   					      const char *name,
>>   					      unsigned long size,
>>   					      unsigned int alignment);
>> -
>> +	/**
>> +	 * Allocate a buffer object from an existing user accessible
>> +	 * address malloc'd with the provided size.
>> +	 * Alignment is used when mapping to the gtt.
>> +	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
>> +	 */
>> +	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
>> +					  const char *name, void *addr,
>> +					  uint32_t tiling_mode, uint32_t stride,
>> +					  unsigned long size,
>> +					  unsigned long flags);
>>   	/**
>>   	 * Allocate a tiled buffer object.
>>   	 *
>
> Probably don't need the special function pointer yet since I don't think
> we can yet envision use cases which will require any kind of special
> handling. A simple has_userptr in bufmgr_gem will probably suffice. But
> I don't care too much either way.

What do you mean?

> I couldn't spot any real bugs...

Cool, I am glad there is some interest for this.

Regards,

Tvrtko
Ben Widawsky May 2, 2014, 5:15 p.m. UTC | #3
On Fri, May 02, 2014 at 11:27:45AM +0100, Tvrtko Ursulin wrote:
> 
> On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> >On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Allow userptr objects to be created and used via libdrm_intel.
> >>
> >>At the moment tiling and mapping to GTT aperture is not supported
> >>due hardware limitations across different generations and uncertainty
> >>about its usefulness.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>---
> >>  include/drm/i915_drm.h    |  16 +++++
> >>  intel/intel_bufmgr.c      |  13 ++++
> >>  intel/intel_bufmgr.h      |   5 ++
> >>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> >>  intel/intel_bufmgr_priv.h |  12 +++-
> >>  5 files changed, 198 insertions(+), 2 deletions(-)
> >>
> >>diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> >>index 2f4eb8c..d32ef99 100644
> >>--- a/include/drm/i915_drm.h
> >>+++ b/include/drm/i915_drm.h
> >>@@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
> >>  #define DRM_I915_GEM_GET_CACHING	0x30
> >>  #define DRM_I915_REG_READ		0x31
> >>  #define DRM_I915_GET_RESET_STATS	0x32
> >>+#define DRM_I915_GEM_USERPTR		0x34
> >>
> >>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> >>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> >>@@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
> >>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> >>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> >>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> >>+#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
> >>
> >>  /* Allow drivers to submit batchbuffers directly to hardware, relying
> >>   * on the security mechanisms provided by hardware.
> >>@@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
> >>  	__u64 offset;
> >>  };
> >>
> >>+struct drm_i915_gem_userptr {
> >>+	__u64 user_ptr;
> >>+	__u64 user_size;
> >
> >Adding alignment might be a safe bet.
> 
> Hmmmm, at first I thought you are raising a good point. But then I don't
> understand why I don't see any aligned types in
> linux/include/uapi/drm/i915_drm.h ?

I meant an alignment field. I was thinking if some buffers have weird
alignment requirements for the GPU (but not the CPU) making that info
available to the kernel would be important.

As another potentially lacking field (which I happen not to care about
for the vmap/userptr style usage), we have certain buffers that need to
live within a certain part of the GPU address space.

I should mention that when I wrote the previous email, I didn't have
access to the latest kernel implementation when I sent the mail. I had
been assuming (because I've been wrapped up in my own, different world),
that the address space allocation was done at the time of the IOCTL. It
is not, and therefore, my request for the existing code does not make
sense.

With that though, I should describe my usecase briefly. I want the
ability to carve out the address space at the time of the IOCTL. As
such, I need to know some of these attributes.

> 
> >>+	__u32 flags;
> >>+#define I915_USERPTR_READ_ONLY 0x1
> >
> >I'll eventually want something like:
> >#define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */
> 
> Plenty of free flags. :)
> 
> >>+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> >>+	/**
> >>+	* Returned handle for the object.
> >>+	*
> >>+	* Object handles are nonzero.
> >>+	*/
> >>+	__u32 handle;
> >>+};
> >>+
> >>  struct drm_i915_gem_set_domain {
> >>  	/** Handle for the object */
> >>  	__u32 handle;
> >>diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
> >>index 905556f..7f3d795 100644
> >>--- a/intel/intel_bufmgr.c
> >>+++ b/intel/intel_bufmgr.c
> >>@@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> >>  	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
> >>  }
> >>
> >>+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+					  const char *name, void *addr,
> >>+					  uint32_t tiling_mode,
> >>+					  uint32_t stride,
> >>+					  unsigned long size,
> >>+					  unsigned long flags)
> >>+{
> >>+	if (bufmgr->bo_alloc_userptr)
> >>+		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
> >>+						stride, size, flags);
> >>+	return NULL;
> >>+}
> >>+
> >>  drm_intel_bo *
> >>  drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> >>                          int x, int y, int cpp, uint32_t *tiling_mode,
> >>diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> >>index 9383c72..be83a56 100644
> >>--- a/intel/intel_bufmgr.h
> >>+++ b/intel/intel_bufmgr.h
> >>@@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> >>  					    const char *name,
> >>  					    unsigned long size,
> >>  					    unsigned int alignment);
> >>+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+					const char *name,
> >>+					void *addr, uint32_t tiling_mode,
> >>+					uint32_t stride, unsigned long size,
> >>+					unsigned long flags);
> >>  drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
> >>  				       const char *name,
> >>  				       int x, int y, int cpp,
> >>diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> >>index 007a6d8..7cad945 100644
> >>--- a/intel/intel_bufmgr_gem.c
> >>+++ b/intel/intel_bufmgr_gem.c
> >>@@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
> >>  	void *mem_virtual;
> >>  	/** GTT virtual address for the buffer, saved across map/unmap cycles */
> >>  	void *gtt_virtual;
> >>+	/**
> >>+	 * Virtual address of the buffer allocated by user, used for userptr
> >>+	 * objects only.
> >>+	 */
> >>+	void *user_virtual;
> >
> >Can we not just re-use mem_virtual? Do you intend to provide the ability
> >to have two distinct CPU mappings to the same object?
> 
> Haven't heard anything like that. By quick look at the code seems that
> reusing mem_virtual would require special casing some parts which touch it.
> So it may be it is actually cleaner as it is.
> 

I didn't look too hard, but I think it might *just work* if you use
mem_virtual. I will try it and let you know.

> >>  	int map_count;
> >>  	drmMMListHead vma_list;
> >>
> >>@@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
> >>  	bool idle;
> >>
> >>  	/**
> >>+	 * Boolean of whether this buffer was allocated with userptr
> >>+	 */
> >>+	bool is_userptr;
> >>+
> >>+	/**
> >>  	 * Size in bytes of this buffer and its relocation descendents.
> >>  	 *
> >>  	 * Used to avoid costly tree walking in
> >>@@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> >>  					       tiling, stride);
> >>  }
> >>
> >>+static drm_intel_bo *
> >>+drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
> >>+				const char *name,
> >>+				void *addr,
> >>+				uint32_t tiling_mode,
> >>+				uint32_t stride,
> >>+				unsigned long size,
> >>+				unsigned long flags)
> >
> >I'd vote for dropping tiling_mode, stride. Make size and flags
> >explicitly sized types. Not doing this has bitten us already.
> 
> Some people expressed interest in tiling so I thought to preserve it in the
> API.
> 
> Kernel even allows it, and it is just because Chris found bspec references
> saying it will break badly on some platforms, that I (or maybe it was both
> of us) decided to disable it on this level for time being.
> 
> So I think it is just a matter of coming up with a blacklist and it would be
> possible to use it then.
> 

Well again, maybe this is specific to my usecase, but I can never
imagine a use for such fields at this stage in the buffer's lifecycle.
Could you get "some people" to describe how they want to use it?

> >>+{
> >>+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
> >>+	drm_intel_bo_gem *bo_gem;
> >>+	int ret;
> >>+	struct drm_i915_gem_userptr userptr;
> >>+
> >>+	/* Tiling with userptr surfaces is not supported
> >>+	 * on all hardware so refuse it for time being.
> >>+	 */
> >>+	if (tiling_mode != I915_TILING_NONE)
> >>+		return NULL;
> >>+
> >>+	bo_gem = calloc(1, sizeof(*bo_gem));
> >>+	if (!bo_gem)
> >>+		return NULL;
> >>+
> >>+	bo_gem->bo.size = size;
> >>+
> >>+	VG_CLEAR(userptr);
> >>+	userptr.user_ptr = (__u64)((unsigned long)addr);
> >
> >How are we getting away with non page aligned addresses? I'd vote for
> >moving some of the memalign checks into here if all cases require page
> >aligned.
> 
> Kernel will reject any unaligned address or size in the ioctl.
> 
> >>+	userptr.user_size = size;
> >>+	userptr.flags = flags;
> >>+
> >>+	ret = drmIoctl(bufmgr_gem->fd,
> >>+			DRM_IOCTL_I915_GEM_USERPTR,
> >>+			&userptr);
> >>+	if (ret != 0) {
> >>+		DBG("bo_create_userptr: "
> >>+		    "ioctl failed with user ptr %p size 0x%lx, "
> >>+		    "user flags 0x%lx\n", addr, size, flags);
> >>+		free(bo_gem);
> >>+		return NULL;
> >>+	}
> >>+
> >>+	bo_gem->gem_handle = userptr.handle;
> >>+	bo_gem->bo.handle = bo_gem->gem_handle;
> >>+	bo_gem->bo.bufmgr    = bufmgr;
> >>+	bo_gem->is_userptr   = true;
> >>+	bo_gem->bo.virtual   = addr;
> >>+	/* Save the address provided by user */
> >>+	bo_gem->user_virtual = addr;
> >>+	bo_gem->tiling_mode  = I915_TILING_NONE;
> >>+	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> >>+	bo_gem->stride       = 0;
> >>+
> >>+	DRMINITLISTHEAD(&bo_gem->name_list);
> >>+	DRMINITLISTHEAD(&bo_gem->vma_list);
> >>+
> >>+	bo_gem->name = name;
> >>+	atomic_set(&bo_gem->refcount, 1);
> >>+	bo_gem->validate_index = -1;
> >>+	bo_gem->reloc_tree_fences = 0;
> >>+	bo_gem->used_as_reloc_target = false;
> >>+	bo_gem->has_error = false;
> >>+	bo_gem->reusable = false;
> >
> >TODO for someone: Some room to consolidate this with the other create
> >functions.
> >
> >>+
> >>+	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
> >
> >I'm looking at the code and trying to figure out how this is relevant.
> >It's not introduced by you directly. Just looking at the math it all
> >seems to kind of mix-up aperture and non-aperture usages. Just talking
> >to myself...
> >
> >>+
> >>+	DBG("bo_create_userptr: "
> >>+	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
> >>+		addr, bo_gem->gem_handle, bo_gem->name,
> >>+		size, stride, tiling_mode);
> >>+
> >>+	return &bo_gem->bo;
> >>+}
> >>+
> >>  /**
> >>   * Returns a drm_intel_bo wrapping the given buffer object handle.
> >>   *
> >>@@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> >>  	struct drm_i915_gem_set_domain set_domain;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr) {
> >>+		/* Return the same user ptr */
> >>+		bo->virtual = bo_gem->user_virtual;
> >>+		return 0;
> >>+	}
> >>+
> >>  	pthread_mutex_lock(&bufmgr_gem->lock);
> >>
> >>  	if (bo_gem->map_count++ == 0)
> >>@@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	if (bo_gem->map_count++ == 0)
> >>  		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
> >>
> >>@@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
> >>
> >>  static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
> >>  {
> >>-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> >>+	drm_intel_bufmgr_gem *bufmgr_gem;
> >
> >I'm missing why this hunk was changed. But, okay.
> 
> Just to ensure NULL bo is handled nicely I guess.
> 
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret = 0;
> >>
> >>  	if (bo == NULL)
> >>  		return 0;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return 0;
> >>+
> >>+	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> >>+
> >>  	pthread_mutex_lock(&bufmgr_gem->lock);
> >>
> >>  	if (bo_gem->map_count <= 0) {
> >>@@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
> >>  	struct drm_i915_gem_pwrite pwrite;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	VG_CLEAR(pwrite);
> >>  	pwrite.handle = bo_gem->gem_handle;
> >>  	pwrite.offset = offset;
> >>@@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
> >>  	struct drm_i915_gem_pread pread;
> >>  	int ret;
> >>
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	VG_CLEAR(pread);
> >>  	pread.handle = bo_gem->gem_handle;
> >>  	pread.offset = offset;
> >>@@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
> >>  	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> >>  	int ret;
> >>
> >>+	/* Tiling with userptr surfaces is not supported
> >>+	 * on all hardware so refuse it for time being.
> >>+	 */
> >>+	if (bo_gem->is_userptr)
> >>+		return -EINVAL;
> >>+
> >>  	/* Linear buffers have no stride. By ensuring that we only ever use
> >>  	 * stride 0 with linear buffers, we simplify our code.
> >>  	 */
> >>@@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
> >>  	bo_gem->aub_annotation_count = count;
> >>  }
> >>
> >>+static bool
> >>+has_userptr(int fd)
> >>+{
> >>+	int ret;
> >>+	void *ptr;
> >>+	long pgsz;
> >>+	struct drm_i915_gem_userptr userptr;
> >>+	struct drm_gem_close close_bo;
> >>+
> >>+	pgsz = sysconf(_SC_PAGESIZE);
> >>+	assert(ret > 0);
> >>+
> >>+	ret = posix_memalign(&ptr, pgsz, pgsz);
> >>+	assert(ret == 0);
> >>+
> >>+	memset(&userptr, 0, sizeof(userptr));
> >>+	userptr.user_ptr = (__u64)(unsigned long)ptr;
> >>+	userptr.user_size = pgsz;
> >>+
> >>+retry:
> >>+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
> >>+	if (ret) {
> >>+		if (errno == ENODEV && userptr.flags == 0) {
> >>+			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
> >>+			goto retry;
> >>+		}
> >>+		free(ptr);
> >>+		return false;
> >>+	}
> >>+
> >>+	close_bo.handle = userptr.handle;
> >>+	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
> >>+	assert(ret == 0);
> >>+	free(ptr);
> >>+
> >>+	return true;
> >>+}
> >
> >I think a param for USERPTR is warranted. Or did we decide not to do
> >this already?
> 
> I asked for it, but people with authority said no. It is all very weak,
> fragile and dangerous anyway - param or feature detect like the above.
> 
> We would really need a proper feature detect mechanism, like text based in
> sysfs or something.
> 

I don't see why a param is fragile. Feature detect OTOH is almost always
implemented in a fragile manner.

> >>+
> >>  /**
> >>   * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
> >>   * and manage map buffer objections.
> >>@@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> >>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> >>  	bufmgr_gem->has_relaxed_fencing = ret == 0;
> >>
> >>+	if (has_userptr(fd))
> >>+		bufmgr_gem->bufmgr.bo_alloc_userptr =
> >>+			drm_intel_gem_bo_alloc_userptr;
> >>+
> >>  	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
> >>  	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> >>  	bufmgr_gem->has_wait_timeout = ret == 0;
> >>diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
> >>index 2592d42..3aa1abb 100644
> >>--- a/intel/intel_bufmgr_priv.h
> >>+++ b/intel/intel_bufmgr_priv.h
> >>@@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
> >>  					      const char *name,
> >>  					      unsigned long size,
> >>  					      unsigned int alignment);
> >>-
> >>+	/**
> >>+	 * Allocate a buffer object from an existing user accessible
> >>+	 * address malloc'd with the provided size.
> >>+	 * Alignment is used when mapping to the gtt.
> >>+	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
> >>+	 */
> >>+	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
> >>+					  const char *name, void *addr,
> >>+					  uint32_t tiling_mode, uint32_t stride,
> >>+					  unsigned long size,
> >>+					  unsigned long flags);
> >>  	/**
> >>  	 * Allocate a tiled buffer object.
> >>  	 *
> >
> >Probably don't need the special function pointer yet since I don't think
> >we can yet envision use cases which will require any kind of special
> >handling. A simple has_userptr in bufmgr_gem will probably suffice. But
> >I don't care too much either way.
> 
> What do you mean?

Don't add bo_alloc_userptr to the bufmgr. Just add the prototype to
intel_bufmgr.h.

> 
> >I couldn't spot any real bugs...
> 
> Cool, I am glad there is some interest for this.
> 

I'm pretty close to running this with most of the changes I had asked
you for. I need to see how much of your igt test I can reuse now.

> Regards,
> 
> Tvrtko
Daniel Vetter May 5, 2014, 9:35 a.m. UTC | #4
On Fri, May 02, 2014 at 10:15:30AM -0700, Ben Widawsky wrote:
> On Fri, May 02, 2014 at 11:27:45AM +0100, Tvrtko Ursulin wrote:
> > 
> > On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> > >On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > >>
> > >>Allow userptr objects to be created and used via libdrm_intel.
> > >>
> > >>At the moment tiling and mapping to GTT aperture is not supported
> > >>due hardware limitations across different generations and uncertainty
> > >>about its usefulness.
> > >>
> > >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > >>---
> > >>  include/drm/i915_drm.h    |  16 +++++
> > >>  intel/intel_bufmgr.c      |  13 ++++
> > >>  intel/intel_bufmgr.h      |   5 ++
> > >>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> > >>  intel/intel_bufmgr_priv.h |  12 +++-
> > >>  5 files changed, 198 insertions(+), 2 deletions(-)
> > >>
> > >>diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> > >>index 2f4eb8c..d32ef99 100644
> > >>--- a/include/drm/i915_drm.h
> > >>+++ b/include/drm/i915_drm.h
> > >>@@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
> > >>  #define DRM_I915_GEM_GET_CACHING	0x30
> > >>  #define DRM_I915_REG_READ		0x31
> > >>  #define DRM_I915_GET_RESET_STATS	0x32
> > >>+#define DRM_I915_GEM_USERPTR		0x34
> > >>
> > >>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> > >>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> > >>@@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
> > >>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
> > >>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
> > >>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> > >>+#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
> > >>
> > >>  /* Allow drivers to submit batchbuffers directly to hardware, relying
> > >>   * on the security mechanisms provided by hardware.
> > >>@@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
> > >>  	__u64 offset;
> > >>  };
> > >>
> > >>+struct drm_i915_gem_userptr {
> > >>+	__u64 user_ptr;
> > >>+	__u64 user_size;
> > >
> > >Adding alignment might be a safe bet.
> > 
> > Hmmmm, at first I thought you are raising a good point. But then I don't
> > understand why I don't see any aligned types in
> > linux/include/uapi/drm/i915_drm.h ?
> 
> I meant an alignment field. I was thinking if some buffers have weird
> alignment requirements for the GPU (but not the CPU) making that info
> available to the kernel would be important.

We have a flags param and we can always extend the ioctl at the end, so
imo no need to preemptively add stuff we don't need yet.
-Daniel
Tvrtko Ursulin May 7, 2014, 10:33 a.m. UTC | #5
On 05/02/2014 06:15 PM, Ben Widawsky wrote:
> On Fri, May 02, 2014 at 11:27:45AM +0100, Tvrtko Ursulin wrote:
>> Some people expressed interest in tiling so I thought to preserve it in the
>> API.
>>
>> Kernel even allows it, and it is just because Chris found bspec references
>> saying it will break badly on some platforms, that I (or maybe it was both
>> of us) decided to disable it on this level for time being.
>>
>> So I think it is just a matter of coming up with a blacklist and it would be
>> possible to use it then.
>>
>
> Well again, maybe this is specific to my usecase, but I can never
> imagine a use for such fields at this stage in the buffer's lifecycle.
> Could you get "some people" to describe how they want to use it?

Actually thinking about it more, when I collated requirements from 
various groups they were not all that interested. But Chris was actually 
in favour of keeping it in the kernel rather than disabling it 
altogether. So I decided to keep it in the userspace API and only reject 
the attempts to use it for time being.

>>> I think a param for USERPTR is warranted. Or did we decide not to do
>>> this already?
>>
>> I asked for it, but people with authority said no. It is all very weak,
>> fragile and dangerous anyway - param or feature detect like the above.
>>
>> We would really need a proper feature detect mechanism, like text based in
>> sysfs or something.
>>
>
> I don't see why a param is fragile. Feature detect OTOH is almost always
> implemented in a fragile manner.

Not if we had a text file in sysfs with names rather than numbers 
(getparam) without a central allocation authority.

HAS_PARAM(FEATURE1) actually just tricks you into thinking it's fine, 
while actually you are just asking "Do I have feature 48". What is 
feature 48? Who knows... some features never make it to upstream, some 
do and then get their HAS_PARAM number reallocated so it is really weak.

Something like "grep userptr /sys/kernel/debug/dri/0/i915_features", or 
"stat /sys/kernel/debug/dri/0/i915/features/userptr" would be much better.

This way or the other, it seems to be there is not consensus with 
upstream gate keepers whether to have it or not. It was Chris actually 
who ripped it out. Personally I can see both arguments which is why I 
think we should come up with something better.

>>> Probably don't need the special function pointer yet since I don't think
>>> we can yet envision use cases which will require any kind of special
>>> handling. A simple has_userptr in bufmgr_gem will probably suffice. But
>>> I don't care too much either way.
>>
>> What do you mean?
>
> Don't add bo_alloc_userptr to the bufmgr. Just add the prototype to
> intel_bufmgr.h.

Not sure, wouldn't like to make inconsistent.

> I'm pretty close to running this with most of the changes I had asked
> you for. I need to see how much of your igt test I can reuse now.

Cool, so there is nothing for me to do. :)

Regards,

Tvrtko
Ben Widawsky May 9, 2014, 12:10 a.m. UTC | #6
On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Allow userptr objects to be created and used via libdrm_intel.
> 
> At the moment tiling and mapping to GTT aperture is not supported
> due hardware limitations across different generations and uncertainty
> about its usefulness.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  include/drm/i915_drm.h    |  16 +++++
>  intel/intel_bufmgr.c      |  13 ++++
>  intel/intel_bufmgr.h      |   5 ++
>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>  intel/intel_bufmgr_priv.h |  12 +++-
>  5 files changed, 198 insertions(+), 2 deletions(-)
> 
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 2f4eb8c..d32ef99 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_I915_GEM_GET_CACHING	0x30
>  #define DRM_I915_REG_READ		0x31
>  #define DRM_I915_GET_RESET_STATS	0x32
> +#define DRM_I915_GEM_USERPTR		0x34
>  
>  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>  #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>  #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
> +#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
>  
>  /* Allow drivers to submit batchbuffers directly to hardware, relying
>   * on the security mechanisms provided by hardware.
> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
>  	__u64 offset;
>  };
>  
> +struct drm_i915_gem_userptr {
> +	__u64 user_ptr;
> +	__u64 user_size;
> +	__u32 flags;
> +#define I915_USERPTR_READ_ONLY 0x1
> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> +	/**
> +	* Returned handle for the object.
> +	*
> +	* Object handles are nonzero.
> +	*/
> +	__u32 handle;
> +};
> +

Oh yeah. I want a ctx_id here as well. Chris, any objection to adding
this?

[snip]
Chris Wilson May 9, 2014, 5:30 a.m. UTC | #7
On Thu, May 08, 2014 at 05:10:24PM -0700, Ben Widawsky wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> > +struct drm_i915_gem_userptr {
> > +	__u64 user_ptr;
> > +	__u64 user_size;
> > +	__u32 flags;
> > +#define I915_USERPTR_READ_ONLY 0x1
> > +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> > +	/**
> > +	* Returned handle for the object.
> > +	*
> > +	* Object handles are nonzero.
> > +	*/
> > +	__u32 handle;
> > +};
> > +
> 
> Oh yeah. I want a ctx_id here as well. Chris, any objection to adding
> this?

What for? bo are file-scoped not context-scoped.
-Chris
Daniel Vetter May 12, 2014, 4 p.m. UTC | #8
On Fri, May 09, 2014 at 06:30:11AM +0100, Chris Wilson wrote:
> On Thu, May 08, 2014 at 05:10:24PM -0700, Ben Widawsky wrote:
> > On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> > > +struct drm_i915_gem_userptr {
> > > +	__u64 user_ptr;
> > > +	__u64 user_size;
> > > +	__u32 flags;
> > > +#define I915_USERPTR_READ_ONLY 0x1
> > > +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
> > > +	/**
> > > +	* Returned handle for the object.
> > > +	*
> > > +	* Object handles are nonzero.
> > > +	*/
> > > +	__u32 handle;
> > > +};
> > > +
> > 
> > Oh yeah. I want a ctx_id here as well. Chris, any objection to adding
> > this?
> 
> What for? bo are file-scoped not context-scoped.

I think what Ben actually wants is a ctx_id in the soft-pin ioctl. Makes
his mirrored ppgtt POC a 2 ioctl thing, but the resulting orthogonality of
interfaces is imo something much to be preferred. Since we still need real
bos for scanout targets and similar, and I'm going to rip everyone's head
off if we do that by shmem-mapping+userptr ;-)
-Daniel
Lespiau, Damien June 19, 2014, 11:13 a.m. UTC | #9
On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Allow userptr objects to be created and used via libdrm_intel.
> 
> At the moment tiling and mapping to GTT aperture is not supported
> due hardware limitations across different generations and uncertainty
> about its usefulness.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  include/drm/i915_drm.h    |  16 +++++
>  intel/intel_bufmgr.c      |  13 ++++
>  intel/intel_bufmgr.h      |   5 ++
>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>  intel/intel_bufmgr_priv.h |  12 +++-
>  5 files changed, 198 insertions(+), 2 deletions(-)

Apart from couple of remarks below I couldn't find anything that would
prevent merging this. Well, except maybe that it'd be very nice to have
some feedback from someone using it, we do have an API/ABI guarantee on
libdrm after all.
Lespiau, Damien June 19, 2014, 11:27 a.m. UTC | #10
On Thu, Jun 19, 2014 at 12:13:20PM +0100, Damien Lespiau wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > 
> > Allow userptr objects to be created and used via libdrm_intel.
> > 
> > At the moment tiling and mapping to GTT aperture is not supported
> > due hardware limitations across different generations and uncertainty
> > about its usefulness.
> > 
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >  include/drm/i915_drm.h    |  16 +++++
> >  intel/intel_bufmgr.c      |  13 ++++
> >  intel/intel_bufmgr.h      |   5 ++
> >  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> >  intel/intel_bufmgr_priv.h |  12 +++-
> >  5 files changed, 198 insertions(+), 2 deletions(-)
> 
> Apart from couple of remarks below I couldn't find anything that would
> prevent merging this. Well, except maybe that it'd be very nice to have
> some feedback from someone using it, we do have an API/ABI guarantee on
> libdrm after all.

Actually, if you're retouching this patch, maybe spliting the i915_drm.h
update would be good, that's something we can push straight away.
Tvrtko Ursulin July 9, 2014, 1:08 p.m. UTC | #11
On 06/19/2014 12:13 PM, Damien Lespiau wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Allow userptr objects to be created and used via libdrm_intel.
>>
>> At the moment tiling and mapping to GTT aperture is not supported
>> due hardware limitations across different generations and uncertainty
>> about its usefulness.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   include/drm/i915_drm.h    |  16 +++++
>>   intel/intel_bufmgr.c      |  13 ++++
>>   intel/intel_bufmgr.h      |   5 ++
>>   intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>>   intel/intel_bufmgr_priv.h |  12 +++-
>>   5 files changed, 198 insertions(+), 2 deletions(-)
>
> Apart from couple of remarks below I couldn't find anything that would
> prevent merging this. Well, except maybe that it'd be very nice to have
> some feedback from someone using it, we do have an API/ABI guarantee on
> libdrm after all.

Looks like I've forgotten to reply to this. I did address the other 
review comments and sent out a v2 back then.

But for what users are concerned, apart from internal ones who have been 
using this API for some years now, I don't know of any.

Tvrtko
Lespiau, Damien July 9, 2014, 1:16 p.m. UTC | #12
On Wed, Jul 09, 2014 at 02:08:08PM +0100, Tvrtko Ursulin wrote:
> 
> On 06/19/2014 12:13 PM, Damien Lespiau wrote:
> >On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Allow userptr objects to be created and used via libdrm_intel.
> >>
> >>At the moment tiling and mapping to GTT aperture is not supported
> >>due hardware limitations across different generations and uncertainty
> >>about its usefulness.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>---
> >>  include/drm/i915_drm.h    |  16 +++++
> >>  intel/intel_bufmgr.c      |  13 ++++
> >>  intel/intel_bufmgr.h      |   5 ++
> >>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> >>  intel/intel_bufmgr_priv.h |  12 +++-
> >>  5 files changed, 198 insertions(+), 2 deletions(-)
> >
> >Apart from couple of remarks below I couldn't find anything that would
> >prevent merging this. Well, except maybe that it'd be very nice to have
> >some feedback from someone using it, we do have an API/ABI guarantee on
> >libdrm after all.
> 
> Looks like I've forgotten to reply to this. I did address the other
> review comments and sent out a v2 back then.
> 
> But for what users are concerned, apart from internal ones who have
> been using this API for some years now, I don't know of any.

Well, considering this is only a wrapper of an ioctl() already
upstreamed, I'm inclined to just push it as is. Daniel any thoughts?
Daniel Vetter July 9, 2014, 2:25 p.m. UTC | #13
On Wed, Jul 09, 2014 at 02:16:59PM +0100, Damien Lespiau wrote:
> On Wed, Jul 09, 2014 at 02:08:08PM +0100, Tvrtko Ursulin wrote:
> > 
> > On 06/19/2014 12:13 PM, Damien Lespiau wrote:
> > >On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
> > >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > >>
> > >>Allow userptr objects to be created and used via libdrm_intel.
> > >>
> > >>At the moment tiling and mapping to GTT aperture is not supported
> > >>due hardware limitations across different generations and uncertainty
> > >>about its usefulness.
> > >>
> > >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > >>---
> > >>  include/drm/i915_drm.h    |  16 +++++
> > >>  intel/intel_bufmgr.c      |  13 ++++
> > >>  intel/intel_bufmgr.h      |   5 ++
> > >>  intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
> > >>  intel/intel_bufmgr_priv.h |  12 +++-
> > >>  5 files changed, 198 insertions(+), 2 deletions(-)
> > >
> > >Apart from couple of remarks below I couldn't find anything that would
> > >prevent merging this. Well, except maybe that it'd be very nice to have
> > >some feedback from someone using it, we do have an API/ABI guarantee on
> > >libdrm after all.
> > 
> > Looks like I've forgotten to reply to this. I did address the other
> > review comments and sent out a v2 back then.
> > 
> > But for what users are concerned, apart from internal ones who have
> > been using this API for some years now, I don't know of any.
> 
> Well, considering this is only a wrapper of an ioctl() already
> upstreamed, I'm inclined to just push it as is. Daniel any thoughts?

Since both igt and sna have their own wrappers and the beinget patch for
this hasn't surfaced yet we don't really have a public open-source user
for this yet. My understanding of Dave's stance is that we should hold off
with committing until this is requirement is fulfilled.
-Daniel
diff mbox

Patch

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 2f4eb8c..d32ef99 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -223,6 +223,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_CACHING	0x30
 #define DRM_I915_REG_READ		0x31
 #define DRM_I915_GET_RESET_STATS	0x32
+#define DRM_I915_GEM_USERPTR		0x34
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -273,6 +274,7 @@  typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
 #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
 #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -498,6 +500,20 @@  struct drm_i915_gem_mmap_gtt {
 	__u64 offset;
 };
 
+struct drm_i915_gem_userptr {
+	__u64 user_ptr;
+	__u64 user_size;
+	__u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+	/**
+	* Returned handle for the object.
+	*
+	* Object handles are nonzero.
+	*/
+	__u32 handle;
+};
+
 struct drm_i915_gem_set_domain {
 	/** Handle for the object */
 	__u32 handle;
diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
index 905556f..7f3d795 100644
--- a/intel/intel_bufmgr.c
+++ b/intel/intel_bufmgr.c
@@ -60,6 +60,19 @@  drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
 	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
 }
 
+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
+					  const char *name, void *addr,
+					  uint32_t tiling_mode,
+					  uint32_t stride,
+					  unsigned long size,
+					  unsigned long flags)
+{
+	if (bufmgr->bo_alloc_userptr)
+		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
+						stride, size, flags);
+	return NULL;
+}
+
 drm_intel_bo *
 drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
                         int x, int y, int cpp, uint32_t *tiling_mode,
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9383c72..be83a56 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -113,6 +113,11 @@  drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
 					    const char *name,
 					    unsigned long size,
 					    unsigned int alignment);
+drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
+					const char *name,
+					void *addr, uint32_t tiling_mode,
+					uint32_t stride, unsigned long size,
+					unsigned long flags);
 drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
 				       const char *name,
 				       int x, int y, int cpp,
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 007a6d8..7cad945 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -182,6 +182,11 @@  struct _drm_intel_bo_gem {
 	void *mem_virtual;
 	/** GTT virtual address for the buffer, saved across map/unmap cycles */
 	void *gtt_virtual;
+	/**
+	 * Virtual address of the buffer allocated by user, used for userptr
+	 * objects only.
+	 */
+	void *user_virtual;
 	int map_count;
 	drmMMListHead vma_list;
 
@@ -221,6 +226,11 @@  struct _drm_intel_bo_gem {
 	bool idle;
 
 	/**
+	 * Boolean of whether this buffer was allocated with userptr
+	 */
+	bool is_userptr;
+
+	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
 	 * Used to avoid costly tree walking in
@@ -847,6 +857,80 @@  drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
 					       tiling, stride);
 }
 
+static drm_intel_bo *
+drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
+				const char *name,
+				void *addr,
+				uint32_t tiling_mode,
+				uint32_t stride,
+				unsigned long size,
+				unsigned long flags)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+	drm_intel_bo_gem *bo_gem;
+	int ret;
+	struct drm_i915_gem_userptr userptr;
+
+	/* Tiling with userptr surfaces is not supported
+	 * on all hardware so refuse it for time being.
+	 */
+	if (tiling_mode != I915_TILING_NONE)
+		return NULL;
+
+	bo_gem = calloc(1, sizeof(*bo_gem));
+	if (!bo_gem)
+		return NULL;
+
+	bo_gem->bo.size = size;
+
+	VG_CLEAR(userptr);
+	userptr.user_ptr = (__u64)((unsigned long)addr);
+	userptr.user_size = size;
+	userptr.flags = flags;
+
+	ret = drmIoctl(bufmgr_gem->fd,
+			DRM_IOCTL_I915_GEM_USERPTR,
+			&userptr);
+	if (ret != 0) {
+		DBG("bo_create_userptr: "
+		    "ioctl failed with user ptr %p size 0x%lx, "
+		    "user flags 0x%lx\n", addr, size, flags);
+		free(bo_gem);
+		return NULL;
+	}
+
+	bo_gem->gem_handle = userptr.handle;
+	bo_gem->bo.handle = bo_gem->gem_handle;
+	bo_gem->bo.bufmgr    = bufmgr;
+	bo_gem->is_userptr   = true;
+	bo_gem->bo.virtual   = addr;
+	/* Save the address provided by user */
+	bo_gem->user_virtual = addr;
+	bo_gem->tiling_mode  = I915_TILING_NONE;
+	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+	bo_gem->stride       = 0;
+
+	DRMINITLISTHEAD(&bo_gem->name_list);
+	DRMINITLISTHEAD(&bo_gem->vma_list);
+
+	bo_gem->name = name;
+	atomic_set(&bo_gem->refcount, 1);
+	bo_gem->validate_index = -1;
+	bo_gem->reloc_tree_fences = 0;
+	bo_gem->used_as_reloc_target = false;
+	bo_gem->has_error = false;
+	bo_gem->reusable = false;
+
+	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
+
+	DBG("bo_create_userptr: "
+	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
+		addr, bo_gem->gem_handle, bo_gem->name,
+		size, stride, tiling_mode);
+
+	return &bo_gem->bo;
+}
+
 /**
  * Returns a drm_intel_bo wrapping the given buffer object handle.
  *
@@ -1173,6 +1257,12 @@  static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	struct drm_i915_gem_set_domain set_domain;
 	int ret;
 
+	if (bo_gem->is_userptr) {
+		/* Return the same user ptr */
+		bo->virtual = bo_gem->user_virtual;
+		return 0;
+	}
+
 	pthread_mutex_lock(&bufmgr_gem->lock);
 
 	if (bo_gem->map_count++ == 0)
@@ -1241,6 +1331,9 @@  map_gtt(drm_intel_bo *bo)
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
 	int ret;
 
+	if (bo_gem->is_userptr)
+		return -EINVAL;
+
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
@@ -1385,13 +1478,18 @@  int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
 
 static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
 {
-	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+	drm_intel_bufmgr_gem *bufmgr_gem;
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
 	int ret = 0;
 
 	if (bo == NULL)
 		return 0;
 
+	if (bo_gem->is_userptr)
+		return 0;
+
+	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+
 	pthread_mutex_lock(&bufmgr_gem->lock);
 
 	if (bo_gem->map_count <= 0) {
@@ -1449,6 +1547,9 @@  drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
 	struct drm_i915_gem_pwrite pwrite;
 	int ret;
 
+	if (bo_gem->is_userptr)
+		return -EINVAL;
+
 	VG_CLEAR(pwrite);
 	pwrite.handle = bo_gem->gem_handle;
 	pwrite.offset = offset;
@@ -1501,6 +1602,9 @@  drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
 	struct drm_i915_gem_pread pread;
 	int ret;
 
+	if (bo_gem->is_userptr)
+		return -EINVAL;
+
 	VG_CLEAR(pread);
 	pread.handle = bo_gem->gem_handle;
 	pread.offset = offset;
@@ -2460,6 +2564,12 @@  drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
 	int ret;
 
+	/* Tiling with userptr surfaces is not supported
+	 * on all hardware so refuse it for time being.
+	 */
+	if (bo_gem->is_userptr)
+		return -EINVAL;
+
 	/* Linear buffers have no stride. By ensuring that we only ever use
 	 * stride 0 with linear buffers, we simplify our code.
 	 */
@@ -3181,6 +3291,44 @@  drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
 	bo_gem->aub_annotation_count = count;
 }
 
+static bool
+has_userptr(int fd)
+{
+	int ret;
+	void *ptr;
+	long pgsz;
+	struct drm_i915_gem_userptr userptr;
+	struct drm_gem_close close_bo;
+
+	pgsz = sysconf(_SC_PAGESIZE);
+	assert(ret > 0);
+
+	ret = posix_memalign(&ptr, pgsz, pgsz);
+	assert(ret == 0);
+
+	memset(&userptr, 0, sizeof(userptr));
+	userptr.user_ptr = (__u64)(unsigned long)ptr;
+	userptr.user_size = pgsz;
+
+retry:
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
+	if (ret) {
+		if (errno == ENODEV && userptr.flags == 0) {
+			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
+			goto retry;
+		}
+		free(ptr);
+		return false;
+	}
+
+	close_bo.handle = userptr.handle;
+	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
+	assert(ret == 0);
+	free(ptr);
+
+	return true;
+}
+
 /**
  * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
  * and manage map buffer objections.
@@ -3273,6 +3421,10 @@  drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
 	bufmgr_gem->has_relaxed_fencing = ret == 0;
 
+	if (has_userptr(fd))
+		bufmgr_gem->bufmgr.bo_alloc_userptr =
+			drm_intel_gem_bo_alloc_userptr;
+
 	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
 	bufmgr_gem->has_wait_timeout = ret == 0;
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
index 2592d42..3aa1abb 100644
--- a/intel/intel_bufmgr_priv.h
+++ b/intel/intel_bufmgr_priv.h
@@ -60,7 +60,17 @@  struct _drm_intel_bufmgr {
 					      const char *name,
 					      unsigned long size,
 					      unsigned int alignment);
-
+	/**
+	 * Allocate a buffer object from an existing user accessible
+	 * address malloc'd with the provided size.
+	 * Alignment is used when mapping to the gtt.
+	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
+	 */
+	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
+					  const char *name, void *addr,
+					  uint32_t tiling_mode, uint32_t stride,
+					  unsigned long size,
+					  unsigned long flags);
 	/**
 	 * Allocate a tiled buffer object.
 	 *