diff mbox

drm/915: Pad GTT views of exec objects up to user specified size

Message ID 1445441098-12528-1-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Oct. 21, 2015, 3:24 p.m. UTC
Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.

v2: Long time, no see

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
 drivers/gpu/drm/i915/i915_gem.c            | 77 +++++++++++++++---------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 ++++++-
 include/uapi/drm/i915_drm.h                |  5 +-
 4 files changed, 60 insertions(+), 44 deletions(-)

Comments

Daniel Vetter Oct. 22, 2015, 8:04 a.m. UTC | #1
On Wed, Oct 21, 2015 at 04:24:58PM +0100, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
> 
> v2: Long time, no see
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Usual "needs igt + open-source userspace" broken record from maintainer,
in case someone really wants to use this. Noises I've heard is that it's
for opencl, but who knows.
-Daniel

> ---
>  drivers/gpu/drm/i915/i915_drv.h            |  6 ++-
>  drivers/gpu/drm/i915/i915_gem.c            | 77 +++++++++++++++---------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 ++++++-
>  include/uapi/drm/i915_drm.h                |  5 +-
>  4 files changed, 60 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2771b7edde02..a0ce011a5dc0 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2786,11 +2786,13 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
>  int __must_check
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags);
>  int __must_check
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags);
>  
> @@ -3048,8 +3050,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
>  		      uint32_t alignment,
>  		      unsigned flags)
>  {
> -	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
> -				   alignment, flags | PIN_GLOBAL);
> +	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
> +				   flags | PIN_GLOBAL);
>  }
>  
>  static inline int
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4f49c9dc24e2..d079111d15c5 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1784,7 +1784,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
>  	}
>  
>  	/* Now pin it into the GTT if needed */
> -	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
> +	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
>  	if (ret)
>  		goto unlock;
>  
> @@ -3362,20 +3362,20 @@ static struct i915_vma *
>  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  			   struct i915_address_space *vm,
>  			   const struct i915_ggtt_view *ggtt_view,
> +			   uint64_t size,
>  			   unsigned alignment,
>  			   uint64_t flags)
>  {
>  	struct drm_device *dev = obj->base.dev;
>  	struct drm_i915_private *dev_priv = dev->dev_private;
> -	u32 fence_alignment, unfenced_alignment;
>  	u64 start, end;
> -	u64 size, fence_size;
>  	u32 search_flag, alloc_flag;
>  	struct i915_vma *vma;
>  	int ret;
>  
>  	if (i915_is_ggtt(vm)) {
> -		u32 view_size;
> +		u32 fence_size, fence_alignment, unfenced_alignment;
> +		u64 view_size;
>  
>  		if (WARN_ON(!ggtt_view))
>  			return ERR_PTR(-EINVAL);
> @@ -3393,21 +3393,22 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  								view_size,
>  								obj->tiling_mode,
>  								false);
> -		size = flags & PIN_MAPPABLE ? fence_size : view_size;
> +		size = max(size, view_size);
> +		if (flags & PIN_MAPPABLE)
> +			size = max_t(u64, size, fence_size);
> +
> +		if (alignment == 0)
> +			alignment = flags & PIN_MAPPABLE ? fence_alignment :
> +				unfenced_alignment;
> +		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> +			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> +				  ggtt_view ? ggtt_view->type : 0,
> +				  alignment);
> +			return ERR_PTR(-EINVAL);
> +		}
>  	} else {
> -		fence_size = i915_gem_get_gtt_size(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode);
> -		fence_alignment = i915_gem_get_gtt_alignment(dev,
> -							     obj->base.size,
> -							     obj->tiling_mode,
> -							     true);
> -		unfenced_alignment =
> -			i915_gem_get_gtt_alignment(dev,
> -						   obj->base.size,
> -						   obj->tiling_mode,
> -						   false);
> -		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
> +		size = max_t(u64, size, obj->base.size);
> +		alignment = 4096;
>  	}
>  
>  	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
> @@ -3418,24 +3419,14 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
>  	if (flags & PIN_ZONE_4G)
>  		end = min_t(u64, end, 1ULL << 32);
>  
> -	if (alignment == 0)
> -		alignment = flags & PIN_MAPPABLE ? fence_alignment :
> -						unfenced_alignment;
> -	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
> -		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
> -			  ggtt_view ? ggtt_view->type : 0,
> -			  alignment);
> -		return ERR_PTR(-EINVAL);
> -	}
> -
>  	/* If binding the object/GGTT view requires more space than the entire
>  	 * aperture has, reject it early before evicting everything in a vain
>  	 * attempt to find space.
>  	 */
>  	if (size > end) {
> -		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
> +		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
>  			  ggtt_view ? ggtt_view->type : 0,
> -			  size,
> +			  size, obj->base.size,
>  			  flags & PIN_MAPPABLE ? "mappable" : "total",
>  			  end);
>  		return ERR_PTR(-E2BIG);
> @@ -3892,7 +3883,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
>  	 * (e.g. libkms for the bootup splash), we have to ensure that we
>  	 * always use map_and_fenceable for all scanout buffers.
>  	 */
> -	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
> +	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
>  				       view->type == I915_GGTT_VIEW_NORMAL ?
>  				       PIN_MAPPABLE : 0);
>  	if (ret)
> @@ -4043,12 +4034,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
>  }
>  
>  static bool
> -i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
> +i915_vma_misplaced(struct i915_vma *vma,
> +		   uint64_t size,
> +		   uint32_t alignment,
> +		   uint64_t flags)
>  {
>  	struct drm_i915_gem_object *obj = vma->obj;
>  
> -	if (alignment &&
> -	    vma->node.start & (alignment - 1))
> +	if (vma->node.size < size)
> +		return true;
> +
> +	if (alignment && vma->node.start & (alignment - 1))
>  		return true;
>  
>  	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
> @@ -4088,6 +4084,7 @@ static int
>  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		       struct i915_address_space *vm,
>  		       const struct i915_ggtt_view *ggtt_view,
> +		       uint64_t size,
>  		       uint32_t alignment,
>  		       uint64_t flags)
>  {
> @@ -4118,7 +4115,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
>  			return -EBUSY;
>  
> -		if (i915_vma_misplaced(vma, alignment, flags)) {
> +		if (i915_vma_misplaced(vma, size, alignment, flags)) {
>  			WARN(vma->pin_count,
>  			     "bo is already pinned in %s with incorrect alignment:"
>  			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
> @@ -4139,8 +4136,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  
>  	bound = vma ? vma->bound : 0;
>  	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
> -		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
> -						 flags);
> +		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
> +						 size, alignment, flags);
>  		if (IS_ERR(vma))
>  			return PTR_ERR(vma);
>  	} else {
> @@ -4162,17 +4159,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
>  int
>  i915_gem_object_pin(struct drm_i915_gem_object *obj,
>  		    struct i915_address_space *vm,
> +		    uint64_t size,
>  		    uint32_t alignment,
>  		    uint64_t flags)
>  {
>  	return i915_gem_object_do_pin(obj, vm,
>  				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
> -				      alignment, flags);
> +				      size, alignment, flags);
>  }
>  
>  int
>  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  			 const struct i915_ggtt_view *view,
> +			 uint64_t size,
>  			 uint32_t alignment,
>  			 uint64_t flags)
>  {
> @@ -4180,7 +4179,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
>  		return -EINVAL;
>  
>  	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
> -				      alignment, flags | PIN_GLOBAL);
> +				      size, alignment, flags | PIN_GLOBAL);
>  }
>  
>  void
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 045a7631faa0..b6bc22d49628 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -605,10 +605,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>  			flags |= PIN_HIGH;
>  	}
>  
> -	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
> +	ret = i915_gem_object_pin(obj, vma->vm,
> +				  entry->rsvd1, /* pad_to_size */
> +				  entry->alignment,
> +				  flags);
>  	if ((ret == -ENOSPC  || ret == -E2BIG) &&
>  	    only_mappable_for_reloc(entry->flags))
>  		ret = i915_gem_object_pin(obj, vma->vm,
> +					  entry->rsvd1, /* pad_to_size */
>  					  entry->alignment,
>  					  flags & ~PIN_MAPPABLE);
>  	if (ret)
> @@ -672,6 +676,9 @@ eb_vma_misplaced(struct i915_vma *vma)
>  	    vma->node.start & (entry->alignment - 1))
>  		return true;
>  
> +	if (vma->node.size < entry->rsvd1) /* pad_to_size */
> +		return true;
> +
>  	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
>  	    vma->node.start < BATCH_OFFSET_BIAS)
>  		return true;
> @@ -988,6 +995,13 @@ validate_exec_list(struct drm_device *dev,
>  		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
>  			return -EINVAL;
>  
> +		/* pad_to_size was once a reserved field, so sanitize it */
> +		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
> +			if (offset_in_page(exec[i].rsvd1))
> +				return -EINVAL;
> +		} else
> +			exec[i].rsvd1 = 0;
> +
>  		/* First check for malicious input causing overflow in
>  		 * the worst case where we need to allocate the entire
>  		 * relocation tree as a single array.
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 08e047cba76a..678f7d5320ae 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
>  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>  #define EXEC_OBJECT_WRITE	(1<<2)
>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>  	__u64 flags;
>  
> -	__u64 rsvd1;
> +	__u64 rsvd1; /* pad_to_size */
>  	__u64 rsvd2;
>  };
>  
> -- 
> 2.6.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Oct. 22, 2015, 8:40 a.m. UTC | #2
On Thu, Oct 22, 2015 at 10:04:55AM +0200, Daniel Vetter wrote:
> On Wed, Oct 21, 2015 at 04:24:58PM +0100, Chris Wilson wrote:
> > Our GPUs impose certain requirements upon buffers that depend upon how
> > exactly they are used. Typically this is expressed as that they require
> > a larger surface than would be naively computed by pitch * height.
> > Normally such requirements are hidden away in the userspace driver, but
> > when we accept pointers from strangers and later impose extra conditions
> > on them, the original client allocator has no idea about the
> > monstrosities in the GPU and we require the userspace driver to inform
> > the kernel how many padding pages are required beyond the client
> > allocation.
> > 
> > v2: Long time, no see
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Usual "needs igt + open-source userspace" broken record from maintainer,
> in case someone really wants to use this. Noises I've heard is that it's
> for opencl, but who knows.

igt sent many, many months ago. My user, once again, is in policing
DRI3 and userptr.
-Chris
Tvrtko Ursulin Oct. 22, 2015, 9 a.m. UTC | #3
On 21/10/15 16:24, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> v2: Long time, no see
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

As mentioned by Chris, IGT can be found as "tests/gem_exec_pad_to_size: 
Test object padding at execbuf", might need a respin for top-down 
allocation policy change.

And libdrm support is "libdrm_intel: Add API for execbuf pad to size 
functionality".

Both patches from April this year.

Regards,

Tvrtko
Daniel Vetter Oct. 22, 2015, 9:09 a.m. UTC | #4
On Thu, Oct 22, 2015 at 10:00:54AM +0100, Tvrtko Ursulin wrote:
> 
> On 21/10/15 16:24, Chris Wilson wrote:
> >Our GPUs impose certain requirements upon buffers that depend upon how
> >exactly they are used. Typically this is expressed as that they require
> >a larger surface than would be naively computed by pitch * height.
> >Normally such requirements are hidden away in the userspace driver, but
> >when we accept pointers from strangers and later impose extra conditions
> >on them, the original client allocator has no idea about the
> >monstrosities in the GPU and we require the userspace driver to inform
> >the kernel how many padding pages are required beyond the client
> >allocation.
> >
> >v2: Long time, no see
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> As mentioned by Chris, IGT can be found as "tests/gem_exec_pad_to_size: Test
> object padding at execbuf", might need a respin for top-down allocation
> policy change.

Ah, so was just missing Testcase: line then. lgtm, Chris/Tvrtko please
push.
-Daniel

> 
> And libdrm support is "libdrm_intel: Add API for execbuf pad to size
> functionality".
> 
> Both patches from April this year.
> 
> Regards,
> 
> Tvrtko
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin Oct. 22, 2015, 2:15 p.m. UTC | #5
Hi,

On 21/10/15 16:24, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> v2: Long time, no see

[snip]

> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 08e047cba76a..678f7d5320ae 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>   #define EXEC_OBJECT_WRITE	(1<<2)
>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> +#define EXEC_OBJECT_PAD_TO_SIZE	(1<<4)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>   	__u64 flags;
>
> -	__u64 rsvd1;
> +	__u64 rsvd1; /* pad_to_size */
>   	__u64 rsvd2;
>   };

What do you think about:

union {
	__u64 pad_to_size;
	__u64 rsvd1;
} ?

Kind of like a migration path for userspace?

Tvrtko
Chris Wilson Oct. 22, 2015, 4:07 p.m. UTC | #6
On Thu, Oct 22, 2015 at 03:15:55PM +0100, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 21/10/15 16:24, Chris Wilson wrote:
> >Our GPUs impose certain requirements upon buffers that depend upon how
> >exactly they are used. Typically this is expressed as that they require
> >a larger surface than would be naively computed by pitch * height.
> >Normally such requirements are hidden away in the userspace driver, but
> >when we accept pointers from strangers and later impose extra conditions
> >on them, the original client allocator has no idea about the
> >monstrosities in the GPU and we require the userspace driver to inform
> >the kernel how many padding pages are required beyond the client
> >allocation.
> >
> >v2: Long time, no see
> 
> [snip]
> 
> >diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> >index 08e047cba76a..678f7d5320ae 100644
> >--- a/include/uapi/drm/i915_drm.h
> >+++ b/include/uapi/drm/i915_drm.h
> >@@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
> >  #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
> >  #define EXEC_OBJECT_WRITE	(1<<2)
> >  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> >-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> >+#define EXEC_OBJECT_PAD_TO_SIZE	(1<<4)
> >+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> >  	__u64 flags;
> >
> >-	__u64 rsvd1;
> >+	__u64 rsvd1; /* pad_to_size */
> >  	__u64 rsvd2;
> >  };
> 
> What do you think about:
> 
> union {
> 	__u64 pad_to_size;
> 	__u64 rsvd1;
> } ?
> 
> Kind of like a migration path for userspace?

Hmm, I think that just might work. Clang? Does clang support anonymous
unions yet? Do we care if it doesn't?
-Chris
Tvrtko Ursulin Oct. 23, 2015, 9:50 a.m. UTC | #7
On 22/10/15 17:07, Chris Wilson wrote:
> On Thu, Oct 22, 2015 at 03:15:55PM +0100, Tvrtko Ursulin wrote:
>>
>> Hi,
>>
>> On 21/10/15 16:24, Chris Wilson wrote:
>>> Our GPUs impose certain requirements upon buffers that depend upon how
>>> exactly they are used. Typically this is expressed as that they require
>>> a larger surface than would be naively computed by pitch * height.
>>> Normally such requirements are hidden away in the userspace driver, but
>>> when we accept pointers from strangers and later impose extra conditions
>>> on them, the original client allocator has no idea about the
>>> monstrosities in the GPU and we require the userspace driver to inform
>>> the kernel how many padding pages are required beyond the client
>>> allocation.
>>>
>>> v2: Long time, no see
>>
>> [snip]
>>
>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>> index 08e047cba76a..678f7d5320ae 100644
>>> --- a/include/uapi/drm/i915_drm.h
>>> +++ b/include/uapi/drm/i915_drm.h
>>> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
>>>   #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
>>>   #define EXEC_OBJECT_WRITE	(1<<2)
>>>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>> +#define EXEC_OBJECT_PAD_TO_SIZE	(1<<4)
>>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>>>   	__u64 flags;
>>>
>>> -	__u64 rsvd1;
>>> +	__u64 rsvd1; /* pad_to_size */
>>>   	__u64 rsvd2;
>>>   };
>>
>> What do you think about:
>>
>> union {
>> 	__u64 pad_to_size;
>> 	__u64 rsvd1;
>> } ?
>>
>> Kind of like a migration path for userspace?
>
> Hmm, I think that just might work. Clang? Does clang support anonymous
> unions yet? Do we care if it doesn't?

I've found some existing examples in uapi headers so think we are covered.

Regards,

Tvrtko
Tvrtko Ursulin Oct. 26, 2015, 4 p.m. UTC | #8
Hi,

On 23/10/15 10:50, Tvrtko Ursulin wrote:
> On 22/10/15 17:07, Chris Wilson wrote:
>> On Thu, Oct 22, 2015 at 03:15:55PM +0100, Tvrtko Ursulin wrote:
>>>
>>> Hi,
>>>
>>> On 21/10/15 16:24, Chris Wilson wrote:
>>>> Our GPUs impose certain requirements upon buffers that depend upon how
>>>> exactly they are used. Typically this is expressed as that they require
>>>> a larger surface than would be naively computed by pitch * height.
>>>> Normally such requirements are hidden away in the userspace driver, but
>>>> when we accept pointers from strangers and later impose extra
>>>> conditions
>>>> on them, the original client allocator has no idea about the
>>>> monstrosities in the GPU and we require the userspace driver to inform
>>>> the kernel how many padding pages are required beyond the client
>>>> allocation.
>>>>
>>>> v2: Long time, no see
>>>
>>> [snip]
>>>
>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>> index 08e047cba76a..678f7d5320ae 100644
>>>> --- a/include/uapi/drm/i915_drm.h
>>>> +++ b/include/uapi/drm/i915_drm.h
>>>> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
>>>>   #define EXEC_OBJECT_NEEDS_GTT    (1<<1)
>>>>   #define EXEC_OBJECT_WRITE    (1<<2)
>>>>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>>>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS
>>>> -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>>> +#define EXEC_OBJECT_PAD_TO_SIZE    (1<<4)
>>>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>>>>       __u64 flags;
>>>>
>>>> -    __u64 rsvd1;
>>>> +    __u64 rsvd1; /* pad_to_size */
>>>>       __u64 rsvd2;
>>>>   };
>>>
>>> What do you think about:
>>>
>>> union {
>>>     __u64 pad_to_size;
>>>     __u64 rsvd1;
>>> } ?
>>>
>>> Kind of like a migration path for userspace?
>>
>> Hmm, I think that just might work. Clang? Does clang support anonymous
>> unions yet? Do we care if it doesn't?
>
> I've found some existing examples in uapi headers so think we are covered.

Any further thoughts on this? Would you consider re-spinning the patch 
with this addition?

Regards,

Tvrtko
Chris Wilson Oct. 26, 2015, 5:05 p.m. UTC | #9
On Mon, Oct 26, 2015 at 04:00:20PM +0000, Tvrtko Ursulin wrote:
> 
> Hi,
> 
> On 23/10/15 10:50, Tvrtko Ursulin wrote:
> >On 22/10/15 17:07, Chris Wilson wrote:
> >>On Thu, Oct 22, 2015 at 03:15:55PM +0100, Tvrtko Ursulin wrote:
> >>>
> >>>Hi,
> >>>
> >>>On 21/10/15 16:24, Chris Wilson wrote:
> >>>>Our GPUs impose certain requirements upon buffers that depend upon how
> >>>>exactly they are used. Typically this is expressed as that they require
> >>>>a larger surface than would be naively computed by pitch * height.
> >>>>Normally such requirements are hidden away in the userspace driver, but
> >>>>when we accept pointers from strangers and later impose extra
> >>>>conditions
> >>>>on them, the original client allocator has no idea about the
> >>>>monstrosities in the GPU and we require the userspace driver to inform
> >>>>the kernel how many padding pages are required beyond the client
> >>>>allocation.
> >>>>
> >>>>v2: Long time, no see
> >>>
> >>>[snip]
> >>>
> >>>>diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> >>>>index 08e047cba76a..678f7d5320ae 100644
> >>>>--- a/include/uapi/drm/i915_drm.h
> >>>>+++ b/include/uapi/drm/i915_drm.h
> >>>>@@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
> >>>>  #define EXEC_OBJECT_NEEDS_GTT    (1<<1)
> >>>>  #define EXEC_OBJECT_WRITE    (1<<2)
> >>>>  #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> >>>>-#define __EXEC_OBJECT_UNKNOWN_FLAGS
> >>>>-(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
> >>>>+#define EXEC_OBJECT_PAD_TO_SIZE    (1<<4)
> >>>>+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> >>>>      __u64 flags;
> >>>>
> >>>>-    __u64 rsvd1;
> >>>>+    __u64 rsvd1; /* pad_to_size */
> >>>>      __u64 rsvd2;
> >>>>  };
> >>>
> >>>What do you think about:
> >>>
> >>>union {
> >>>    __u64 pad_to_size;
> >>>    __u64 rsvd1;
> >>>} ?
> >>>
> >>>Kind of like a migration path for userspace?
> >>
> >>Hmm, I think that just might work. Clang? Does clang support anonymous
> >>unions yet? Do we care if it doesn't?
> >
> >I've found some existing examples in uapi headers so think we are covered.
> 
> Any further thoughts on this? Would you consider re-spinning the
> patch with this addition?

I have respun it, but haven't checked it against clang nor do I know
what others think of potential portability issues with other compilers.
-Chris
Tvrtko Ursulin Nov. 3, 2015, 10:57 a.m. UTC | #10
Hi,

On 26/10/15 17:05, Chris Wilson wrote:
> On Mon, Oct 26, 2015 at 04:00:20PM +0000, Tvrtko Ursulin wrote:
>>
>> Hi,
>>
>> On 23/10/15 10:50, Tvrtko Ursulin wrote:
>>> On 22/10/15 17:07, Chris Wilson wrote:
>>>> On Thu, Oct 22, 2015 at 03:15:55PM +0100, Tvrtko Ursulin wrote:
>>>>>
>>>>> Hi,
>>>>>
>>>>> On 21/10/15 16:24, Chris Wilson wrote:
>>>>>> Our GPUs impose certain requirements upon buffers that depend upon how
>>>>>> exactly they are used. Typically this is expressed as that they require
>>>>>> a larger surface than would be naively computed by pitch * height.
>>>>>> Normally such requirements are hidden away in the userspace driver, but
>>>>>> when we accept pointers from strangers and later impose extra
>>>>>> conditions
>>>>>> on them, the original client allocator has no idea about the
>>>>>> monstrosities in the GPU and we require the userspace driver to inform
>>>>>> the kernel how many padding pages are required beyond the client
>>>>>> allocation.
>>>>>>
>>>>>> v2: Long time, no see
>>>>>
>>>>> [snip]
>>>>>
>>>>>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>>>>>> index 08e047cba76a..678f7d5320ae 100644
>>>>>> --- a/include/uapi/drm/i915_drm.h
>>>>>> +++ b/include/uapi/drm/i915_drm.h
>>>>>> @@ -691,10 +691,11 @@ struct drm_i915_gem_exec_object2 {
>>>>>>   #define EXEC_OBJECT_NEEDS_GTT    (1<<1)
>>>>>>   #define EXEC_OBJECT_WRITE    (1<<2)
>>>>>>   #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
>>>>>> -#define __EXEC_OBJECT_UNKNOWN_FLAGS
>>>>>> -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
>>>>>> +#define EXEC_OBJECT_PAD_TO_SIZE    (1<<4)
>>>>>> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
>>>>>>       __u64 flags;
>>>>>>
>>>>>> -    __u64 rsvd1;
>>>>>> +    __u64 rsvd1; /* pad_to_size */
>>>>>>       __u64 rsvd2;
>>>>>>   };
>>>>>
>>>>> What do you think about:
>>>>>
>>>>> union {
>>>>>     __u64 pad_to_size;
>>>>>     __u64 rsvd1;
>>>>> } ?
>>>>>
>>>>> Kind of like a migration path for userspace?
>>>>
>>>> Hmm, I think that just might work. Clang? Does clang support anonymous
>>>> unions yet? Do we care if it doesn't?
>>>
>>> I've found some existing examples in uapi headers so think we are covered.
>>
>> Any further thoughts on this? Would you consider re-spinning the
>> patch with this addition?
>
> I have respun it, but haven't checked it against clang nor do I know
> what others think of potential portability issues with other compilers.

As I said there are other anonymous unions in uapi headers so I think it 
should be fine.

I even think just renaming the field in the first place should be fine, 
otherwise what is the point of having reserved fields.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2771b7edde02..a0ce011a5dc0 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2786,11 +2786,13 @@  void i915_gem_vma_destroy(struct i915_vma *vma);
 int __must_check
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint64_t size,
 		    uint32_t alignment,
 		    uint64_t flags);
 int __must_check
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint64_t size,
 			 uint32_t alignment,
 			 uint64_t flags);
 
@@ -3048,8 +3050,8 @@  i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
 		      unsigned flags)
 {
-	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
-				   alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
+				   flags | PIN_GLOBAL);
 }
 
 static inline int
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4f49c9dc24e2..d079111d15c5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1784,7 +1784,7 @@  int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	}
 
 	/* Now pin it into the GTT if needed */
-	ret = i915_gem_object_ggtt_pin(obj, &view, 0, PIN_MAPPABLE);
+	ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
 	if (ret)
 		goto unlock;
 
@@ -3362,20 +3362,20 @@  static struct i915_vma *
 i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 			   struct i915_address_space *vm,
 			   const struct i915_ggtt_view *ggtt_view,
+			   uint64_t size,
 			   unsigned alignment,
 			   uint64_t flags)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 fence_alignment, unfenced_alignment;
 	u64 start, end;
-	u64 size, fence_size;
 	u32 search_flag, alloc_flag;
 	struct i915_vma *vma;
 	int ret;
 
 	if (i915_is_ggtt(vm)) {
-		u32 view_size;
+		u32 fence_size, fence_alignment, unfenced_alignment;
+		u64 view_size;
 
 		if (WARN_ON(!ggtt_view))
 			return ERR_PTR(-EINVAL);
@@ -3393,21 +3393,22 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 								view_size,
 								obj->tiling_mode,
 								false);
-		size = flags & PIN_MAPPABLE ? fence_size : view_size;
+		size = max(size, view_size);
+		if (flags & PIN_MAPPABLE)
+			size = max_t(u64, size, fence_size);
+
+		if (alignment == 0)
+			alignment = flags & PIN_MAPPABLE ? fence_alignment :
+				unfenced_alignment;
+		if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
+			DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
+				  ggtt_view ? ggtt_view->type : 0,
+				  alignment);
+			return ERR_PTR(-EINVAL);
+		}
 	} else {
-		fence_size = i915_gem_get_gtt_size(dev,
-						   obj->base.size,
-						   obj->tiling_mode);
-		fence_alignment = i915_gem_get_gtt_alignment(dev,
-							     obj->base.size,
-							     obj->tiling_mode,
-							     true);
-		unfenced_alignment =
-			i915_gem_get_gtt_alignment(dev,
-						   obj->base.size,
-						   obj->tiling_mode,
-						   false);
-		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+		size = max_t(u64, size, obj->base.size);
+		alignment = 4096;
 	}
 
 	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
@@ -3418,24 +3419,14 @@  i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
 	if (flags & PIN_ZONE_4G)
 		end = min_t(u64, end, 1ULL << 32);
 
-	if (alignment == 0)
-		alignment = flags & PIN_MAPPABLE ? fence_alignment :
-						unfenced_alignment;
-	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
-		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
-			  ggtt_view ? ggtt_view->type : 0,
-			  alignment);
-		return ERR_PTR(-EINVAL);
-	}
-
 	/* If binding the object/GGTT view requires more space than the entire
 	 * aperture has, reject it early before evicting everything in a vain
 	 * attempt to find space.
 	 */
 	if (size > end) {
-		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
+		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: request=%llu [object=%zd] > %s aperture=%llu\n",
 			  ggtt_view ? ggtt_view->type : 0,
-			  size,
+			  size, obj->base.size,
 			  flags & PIN_MAPPABLE ? "mappable" : "total",
 			  end);
 		return ERR_PTR(-E2BIG);
@@ -3892,7 +3883,7 @@  i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+	ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
 				       view->type == I915_GGTT_VIEW_NORMAL ?
 				       PIN_MAPPABLE : 0);
 	if (ret)
@@ -4043,12 +4034,17 @@  i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
 }
 
 static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma,
+		   uint64_t size,
+		   uint32_t alignment,
+		   uint64_t flags)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 
-	if (alignment &&
-	    vma->node.start & (alignment - 1))
+	if (vma->node.size < size)
+		return true;
+
+	if (alignment && vma->node.start & (alignment - 1))
 		return true;
 
 	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -4088,6 +4084,7 @@  static int
 i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		       struct i915_address_space *vm,
 		       const struct i915_ggtt_view *ggtt_view,
+		       uint64_t size,
 		       uint32_t alignment,
 		       uint64_t flags)
 {
@@ -4118,7 +4115,7 @@  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 			return -EBUSY;
 
-		if (i915_vma_misplaced(vma, alignment, flags)) {
+		if (i915_vma_misplaced(vma, size, alignment, flags)) {
 			WARN(vma->pin_count,
 			     "bo is already pinned in %s with incorrect alignment:"
 			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
@@ -4139,8 +4136,8 @@  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 
 	bound = vma ? vma->bound : 0;
 	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
-		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
-						 flags);
+		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+						 size, alignment, flags);
 		if (IS_ERR(vma))
 			return PTR_ERR(vma);
 	} else {
@@ -4162,17 +4159,19 @@  i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
 		    struct i915_address_space *vm,
+		    uint64_t size,
 		    uint32_t alignment,
 		    uint64_t flags)
 {
 	return i915_gem_object_do_pin(obj, vm,
 				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
-				      alignment, flags);
+				      size, alignment, flags);
 }
 
 int
 i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 			 const struct i915_ggtt_view *view,
+			 uint64_t size,
 			 uint32_t alignment,
 			 uint64_t flags)
 {
@@ -4180,7 +4179,7 @@  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 		return -EINVAL;
 
 	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
-				      alignment, flags | PIN_GLOBAL);
+				      size, alignment, flags | PIN_GLOBAL);
 }
 
 void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 045a7631faa0..b6bc22d49628 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -605,10 +605,14 @@  i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 			flags |= PIN_HIGH;
 	}
 
-	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+	ret = i915_gem_object_pin(obj, vma->vm,
+				  entry->rsvd1, /* pad_to_size */
+				  entry->alignment,
+				  flags);
 	if ((ret == -ENOSPC  || ret == -E2BIG) &&
 	    only_mappable_for_reloc(entry->flags))
 		ret = i915_gem_object_pin(obj, vma->vm,
+					  entry->rsvd1, /* pad_to_size */
 					  entry->alignment,
 					  flags & ~PIN_MAPPABLE);
 	if (ret)
@@ -672,6 +676,9 @@  eb_vma_misplaced(struct i915_vma *vma)
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
+	if (vma->node.size < entry->rsvd1) /* pad_to_size */
+		return true;
+
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
 	    vma->node.start < BATCH_OFFSET_BIAS)
 		return true;
@@ -988,6 +995,13 @@  validate_exec_list(struct drm_device *dev,
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
+		/* pad_to_size was once a reserved field, so sanitize it */
+		if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+			if (offset_in_page(exec[i].rsvd1))
+				return -EINVAL;
+		} else
+			exec[i].rsvd1 = 0;
+
 		/* First check for malicious input causing overflow in
 		 * the worst case where we need to allocate the entire
 		 * relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 08e047cba76a..678f7d5320ae 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -691,10 +691,11 @@  struct drm_i915_gem_exec_object2 {
 #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
 #define EXEC_OBJECT_WRITE	(1<<2)
 #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1)
+#define EXEC_OBJECT_PAD_TO_SIZE	(1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
 	__u64 flags;
 
-	__u64 rsvd1;
+	__u64 rsvd1; /* pad_to_size */
 	__u64 rsvd2;
 };