Message ID | 1435673593-28127-1-git-send-email-thomas.daniel@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Many apologies to Michal for incorrectly spelling his name in the CC list. Thomas. > -----Original Message----- > From: Daniel, Thomas > Sent: Tuesday, June 30, 2015 3:13 PM > To: intel-gfx@lists.freedesktop.org > Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou, > Nanhai; Daniel, Thomas > Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > From: Chris Wilson <chris@chris-wilson.co.uk> > > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > (Not published externally) > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > to allow eviction of soft-pinned objects when another soft-pinned object used > by a subsequent execbuffer overlaps reported by Michal Winiarski. > (Not published externally) > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > pinned first after an address conflict happens to avoid repeated conflicts in > rare cases (Suggested by Chris Wilson). Expanded comment on > drm_i915_gem_exec_object2.offset to cover this new API. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Akash Goel <akash.goel@intel.com> > Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > Cc: Michal Winiarsky <michal.winiarsky@intel.com> > Cc: Zou Nanhai <nanhai.zou@intel.com> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 +++ > drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_evict.c | 38 +++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-- > include/uapi/drm/i915_drm.h | 9 +++-- > 5 files changed, 99 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d90a782..e96c101 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > #define PIN_FULL_RANGE (1<<6) > +#define PIN_OFFSET_FIXED (1<<7) > #define PIN_OFFSET_MASK (~4095) > + > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct > drm_device *dev, > unsigned long start, > unsigned long end, > unsigned flags); > +int __must_check > +i915_gem_evict_for_vma(struct i915_vma *target); > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); > int i915_gem_evict_everything(struct drm_device *dev); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c > b/drivers/gpu/drm/i915/i915_gem.c > index f170da6..a7e5ff2 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct > drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > + if (flags & PIN_OFFSET_FIXED) { > + uint64_t offset = flags & PIN_OFFSET_MASK; > + if (offset & (alignment - 1)) { > + ret = -EINVAL; > + goto err_free_vma; > + } > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vm->mm, > &vma->node); > + } > + if (ret) > + goto err_free_vma; > + } else { > search_free: > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(dev, vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, > &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(dev, vm, size, > alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > > - goto err_free_vma; > + goto err_free_vma; > + } > } > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { > ret = -EINVAL; > @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, > uint32_t alignment, uint64_t flags) > vma->node.start < (flags & PIN_OFFSET_MASK)) > return true; > > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > return false; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c > b/drivers/gpu/drm/i915/i915_gem_evict.c > index d09e35e..6098e2f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -199,6 +199,44 @@ found: > return ret; > } > > +int > +i915_gem_evict_for_vma(struct i915_vma *target) > +{ > + struct drm_mm_node *node, *next; > + > + list_for_each_entry_safe(node, next, > + &target->vm->mm.head_node.node_list, > + node_list) { > + struct i915_vma *vma; > + int ret; > + > + if (node->start + node->size <= target->node.start) > + continue; > + if (node->start >= target->node.start + target->node.size) > + break; > + > + vma = container_of(node, typeof(*vma), node); > + > + if (vma->pin_count) { > + /* We may need to evict a buffer in the same batch */ > + if (!vma->exec_entry) > + return -EBUSY; > + > + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > + /* Overlapping fixed objects in the same batch > */ > + return -EINVAL; > + > + return -ENOSPC; > + } > + > + ret = i915_vma_unbind(vma); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > /** > * i915_gem_evict_vm - Evict all idle vmas from a vm > * @vm: Address space to cleanse > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3d94744..4e6955e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma > *vma, > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if (entry->flags & EXEC_OBJECT_PINNED) > + flags |= entry->offset | PIN_OFFSET_FIXED; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (entry->flags & EXEC_OBJECT_PINNED && > + vma->node.start != entry->offset) > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; > @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs > *ring, > struct i915_vma *vma; > struct i915_address_space *vm; > struct list_head ordered_vmas; > + struct list_head pinned_vmas; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > int retry; > > @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs > *ring, > vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; > > INIT_LIST_HEAD(&ordered_vmas); > + INIT_LIST_HEAD(&pinned_vmas); > while (!list_empty(vmas)) { > struct drm_i915_gem_exec_object2 *entry; > bool need_fence, need_mappable; > @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs > *ring, > obj->tiling_mode != I915_TILING_NONE; > need_mappable = need_fence || need_reloc_mappable(vma); > > - if (need_mappable) { > + if (entry->flags & EXEC_OBJECT_PINNED) > + list_move_tail(&vma->exec_list, &pinned_vmas); > + else if (need_mappable) { > entry->flags |= __EXEC_OBJECT_NEEDS_MAP; > list_move(&vma->exec_list, &ordered_vmas); > } else > @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs > *ring, > obj->base.pending_write_domain = 0; > } > list_splice(&ordered_vmas, vmas); > + list_splice(&pinned_vmas, vmas); > > /* Attempt to pin all of the buffers into the GTT. > * This is done in 3 phases: > @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb) > * Note that actual hangs have only been observed on gen7, but for > * paranoia do it everywhere. > */ > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > > return vma->obj; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 55ba527..18f8f99 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 { > __u64 alignment; > > /** > - * Returned value of the updated offset of the object, for future > - * presumed_offset writes. > + * When the EXEC_OBJECT_PINNED flag is specified this is populated by > + * the user with the GTT offset at which this object will be pinned. > + * Otherwise the kernel populates it with the value of the updated > + * offset of the object, for future presumed_offset writes. > */ > __u64 offset; > > @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS - > (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1) > +#define EXEC_OBJECT_PINNED (1<<4) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > __u64 flags; > > __u64 rsvd1; > -- > 1.7.9.5
On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> wrote: > From: Chris Wilson <chris@chris-wilson.co.uk> > > Userspace can pass in an offset that it presumes the object is located > at. The kernel will then do its utmost to fit the object into that > location. The assumption is that userspace is handling its own object > locations (for example along with full-ppgtt) and that the kernel will > rarely have to make space for the user's requests. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > (Not published externally) > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > to allow eviction of soft-pinned objects when another soft-pinned object used > by a subsequent execbuffer overlaps reported by Michal Winiarski. > (Not published externally) > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > pinned first after an address conflict happens to avoid repeated conflicts in > rare cases (Suggested by Chris Wilson). Expanded comment on > drm_i915_gem_exec_object2.offset to cover this new API. Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature? Kristian > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Akash Goel <akash.goel@intel.com> > Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> > Cc: Michal Winiarsky <michal.winiarsky@intel.com> > Cc: Zou Nanhai <nanhai.zou@intel.com> > Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 +++ > drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++-------- > drivers/gpu/drm/i915/i915_gem_evict.c | 38 +++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-- > include/uapi/drm/i915_drm.h | 9 +++-- > 5 files changed, 99 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d90a782..e96c101 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma); > #define PIN_USER (1<<4) > #define PIN_UPDATE (1<<5) > #define PIN_FULL_RANGE (1<<6) > +#define PIN_OFFSET_FIXED (1<<7) > #define PIN_OFFSET_MASK (~4095) > + > int __must_check > i915_gem_object_pin(struct drm_i915_gem_object *obj, > struct i915_address_space *vm, > @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, > unsigned long start, > unsigned long end, > unsigned flags); > +int __must_check > +i915_gem_evict_for_vma(struct i915_vma *target); > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); > int i915_gem_evict_everything(struct drm_device *dev); > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index f170da6..a7e5ff2 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, > if (IS_ERR(vma)) > goto err_unpin; > > + if (flags & PIN_OFFSET_FIXED) { > + uint64_t offset = flags & PIN_OFFSET_MASK; > + if (offset & (alignment - 1)) { > + ret = -EINVAL; > + goto err_free_vma; > + } > + vma->node.start = offset; > + vma->node.size = size; > + vma->node.color = obj->cache_level; > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + if (ret) { > + ret = i915_gem_evict_for_vma(vma); > + if (ret == 0) > + ret = drm_mm_reserve_node(&vm->mm, &vma->node); > + } > + if (ret) > + goto err_free_vma; > + } else { > search_free: > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > - size, alignment, > - obj->cache_level, > - start, end, > - search_flag, > - alloc_flag); > - if (ret) { > - ret = i915_gem_evict_something(dev, vm, size, alignment, > - obj->cache_level, > - start, end, > - flags); > - if (ret == 0) > - goto search_free; > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, > + size, alignment, > + obj->cache_level, > + start, end, > + search_flag, > + alloc_flag); > + if (ret) { > + ret = i915_gem_evict_something(dev, vm, size, alignment, > + obj->cache_level, > + start, end, > + flags); > + if (ret == 0) > + goto search_free; > > - goto err_free_vma; > + goto err_free_vma; > + } > } > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { > ret = -EINVAL; > @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) > vma->node.start < (flags & PIN_OFFSET_MASK)) > return true; > > + if (flags & PIN_OFFSET_FIXED && > + vma->node.start != (flags & PIN_OFFSET_MASK)) > + return true; > + > return false; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index d09e35e..6098e2f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -199,6 +199,44 @@ found: > return ret; > } > > +int > +i915_gem_evict_for_vma(struct i915_vma *target) > +{ > + struct drm_mm_node *node, *next; > + > + list_for_each_entry_safe(node, next, > + &target->vm->mm.head_node.node_list, > + node_list) { > + struct i915_vma *vma; > + int ret; > + > + if (node->start + node->size <= target->node.start) > + continue; > + if (node->start >= target->node.start + target->node.size) > + break; > + > + vma = container_of(node, typeof(*vma), node); > + > + if (vma->pin_count) { > + /* We may need to evict a buffer in the same batch */ > + if (!vma->exec_entry) > + return -EBUSY; > + > + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > + /* Overlapping fixed objects in the same batch */ > + return -EINVAL; > + > + return -ENOSPC; > + } > + > + ret = i915_vma_unbind(vma); > + if (ret) > + return ret; > + } > + > + return 0; > +} > + > /** > * i915_gem_evict_vm - Evict all idle vmas from a vm > * @vm: Address space to cleanse > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 3d94744..4e6955e 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, > flags |= PIN_GLOBAL | PIN_MAPPABLE; > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; > + if (entry->flags & EXEC_OBJECT_PINNED) > + flags |= entry->offset | PIN_OFFSET_FIXED; > } > > ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); > @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma) > vma->node.start & (entry->alignment - 1)) > return true; > > + if (entry->flags & EXEC_OBJECT_PINNED && > + vma->node.start != entry->offset) > + return true; > + > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && > vma->node.start < BATCH_OFFSET_BIAS) > return true; > @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > struct i915_vma *vma; > struct i915_address_space *vm; > struct list_head ordered_vmas; > + struct list_head pinned_vmas; > bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; > int retry; > > @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; > > INIT_LIST_HEAD(&ordered_vmas); > + INIT_LIST_HEAD(&pinned_vmas); > while (!list_empty(vmas)) { > struct drm_i915_gem_exec_object2 *entry; > bool need_fence, need_mappable; > @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->tiling_mode != I915_TILING_NONE; > need_mappable = need_fence || need_reloc_mappable(vma); > > - if (need_mappable) { > + if (entry->flags & EXEC_OBJECT_PINNED) > + list_move_tail(&vma->exec_list, &pinned_vmas); > + else if (need_mappable) { > entry->flags |= __EXEC_OBJECT_NEEDS_MAP; > list_move(&vma->exec_list, &ordered_vmas); > } else > @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, > obj->base.pending_write_domain = 0; > } > list_splice(&ordered_vmas, vmas); > + list_splice(&pinned_vmas, vmas); > > /* Attempt to pin all of the buffers into the GTT. > * This is done in 3 phases: > @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb) > * Note that actual hangs have only been observed on gen7, but for > * paranoia do it everywhere. > */ > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; > > return vma->obj; > } > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 55ba527..18f8f99 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 { > __u64 alignment; > > /** > - * Returned value of the updated offset of the object, for future > - * presumed_offset writes. > + * When the EXEC_OBJECT_PINNED flag is specified this is populated by > + * the user with the GTT offset at which this object will be pinned. > + * Otherwise the kernel populates it with the value of the updated > + * offset of the object, for future presumed_offset writes. > */ > __u64 offset; > > @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 { > #define EXEC_OBJECT_NEEDS_GTT (1<<1) > #define EXEC_OBJECT_WRITE (1<<2) > #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3) > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1) > +#define EXEC_OBJECT_PINNED (1<<4) > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) > __u64 flags; > > __u64 rsvd1; > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Jun 30, 2015 at 03:13:13PM +0100, Thomas Daniel wrote: > /** > - * Returned value of the updated offset of the object, for future > - * presumed_offset writes. > + * When the EXEC_OBJECT_PINNED flag is specified this is populated by > + * the user with the GTT offset at which this object will be pinned. > + * Otherwise the kernel populates it with the value of the updated > + * offset of the object, for future presumed_offset writes. > */ This isn't strictly true, even without PINNED but NO_RELOC this field must be accurately populated with the presumed_offset. No matter what, the kernel returns the value of the current address after execbuffer. -Chris
On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote: > On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> wrote: > > From: Chris Wilson <chris@chris-wilson.co.uk> > > > > Userspace can pass in an offset that it presumes the object is located > > at. The kernel will then do its utmost to fit the object into that > > location. The assumption is that userspace is handling its own object > > locations (for example along with full-ppgtt) and that the kernel will > > rarely have to make space for the user's requests. > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > > (Not published externally) > > > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > > to allow eviction of soft-pinned objects when another soft-pinned object used > > by a subsequent execbuffer overlaps reported by Michal Winiarski. > > (Not published externally) > > > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > > pinned first after an address conflict happens to avoid repeated conflicts in > > rare cases (Suggested by Chris Wilson). Expanded comment on > > drm_i915_gem_exec_object2.offset to cover this new API. Note this patch is outdated compared to the one I have in my tree. There are also requirements to improve drm_mm_reserve_node(). > Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature? Yeah, it's not that difficult to test, static bool test_has_softpin(int fd) { struct drm_i915_gem_create create; struct drm_i915_gem_exec_object2 object; struct drm_i915_gem_pwrite pwrite; struct drm_i915_gem_execbuffer2 execbuf; uint32_t batch[2] = { 0xa << 23 }; bool ret = false; if (DBG_NO_SOFTPIN) return DBG_NO_SOFTPIN < 0; if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2) return false; memset(&create, 0, sizeof(create)); create.size = 4096; drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); memset(&pwrite, 0, sizeof(pwrite)); pwrite.handle = create.handle; pwrite.offset = 0; pwrite.size = sizeof(batch); pwrite.data_ptr = (uintptr_t)batch; if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite)) goto fail; object.handle = create.handle; memset(&execbuf, 0, sizeof(execbuf)); execbuf.buffers_ptr = (uintptr_t)&object; execbuf.buffer_count = 1; if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf)) goto fail; object.flags = EXEC_OBJECT_PINNED; ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) == 0; fail: drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create); return ret; } but compares to static bool test_has_softpin(int fd) { if (DBG_NO_SOFTPIN) return DBG_NO_SOFTPIN < 0; if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2) return false; return gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0; } with a parameter. At some point, we probably want to add a GETPARAMV! -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Saturday, July 4, 2015 1:24 PM > To: Kristian Høgsberg > Cc: Daniel, Thomas; Belgaumkar, Vinay; intel-gfx@lists.freedesktop.org; Michal > Winiarsky; Goel, Akash > Subject: Re: [Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote: > > On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> > wrote: > > > From: Chris Wilson <chris@chris-wilson.co.uk> > > > > > > Userspace can pass in an offset that it presumes the object is located > > > at. The kernel will then do its utmost to fit the object into that > > > location. The assumption is that userspace is handling its own object > > > locations (for example along with full-ppgtt) and that the kernel will > > > rarely have to make space for the user's requests. > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > > > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > > > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > > > (Not published externally) > > > > > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > > > to allow eviction of soft-pinned objects when another soft-pinned object > used > > > by a subsequent execbuffer overlaps reported by Michal Winiarski. > > > (Not published externally) > > > > > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > > > pinned first after an address conflict happens to avoid repeated conflicts in > > > rare cases (Suggested by Chris Wilson). Expanded comment on > > > drm_i915_gem_exec_object2.offset to cover this new API. > > Note this patch is outdated compared to the one I have in my tree. There > are also requirements to improve drm_mm_reserve_node(). What requirements are these? > > Can we add an I915_PARAM_HAS_EXEC_PINNED param for this feature? > > Yeah, it's not that difficult to test, > > static bool test_has_softpin(int fd) > { > struct drm_i915_gem_create create; > struct drm_i915_gem_exec_object2 object; > struct drm_i915_gem_pwrite pwrite; > struct drm_i915_gem_execbuffer2 execbuf; > uint32_t batch[2] = { 0xa << 23 }; > bool ret = false; > > if (DBG_NO_SOFTPIN) > return DBG_NO_SOFTPIN < 0; > > if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2) > return false; > > memset(&create, 0, sizeof(create)); > create.size = 4096; > drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); > > memset(&pwrite, 0, sizeof(pwrite)); > pwrite.handle = create.handle; > pwrite.offset = 0; > pwrite.size = sizeof(batch); > pwrite.data_ptr = (uintptr_t)batch; > if (drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite)) > goto fail; > > object.handle = create.handle; > > memset(&execbuf, 0, sizeof(execbuf)); > execbuf.buffers_ptr = (uintptr_t)&object; > execbuf.buffer_count = 1; > if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf)) > goto fail; > > object.flags = EXEC_OBJECT_PINNED; > ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf) == 0; > fail: > drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &create); > > return ret; > } > > but compares to > > static bool test_has_softpin(int fd) > { > if (DBG_NO_SOFTPIN) > return DBG_NO_SOFTPIN < 0; > > if (gem_param(fd, I915_PARAM_HAS_ALIASING_PPGTT) < 2) > return false; > > return gem_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN) > 0; > } > > with a parameter. At some point, we probably want to add a GETPARAMV! Yes, a parameter would be cleaner. Thomas.
On Wed, Jul 08, 2015 at 03:04:45PM +0000, Daniel, Thomas wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Saturday, July 4, 2015 1:24 PM > > To: Kristian Høgsberg > > Cc: Daniel, Thomas; Belgaumkar, Vinay; intel-gfx@lists.freedesktop.org; Michal > > Winiarsky; Goel, Akash > > Subject: Re: [Intel-gfx] [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > > > On Fri, Jul 03, 2015 at 10:29:44PM -0700, Kristian Høgsberg wrote: > > > On Tue, Jun 30, 2015 at 7:13 AM, Thomas Daniel <thomas.daniel@intel.com> > > wrote: > > > > From: Chris Wilson <chris@chris-wilson.co.uk> > > > > > > > > Userspace can pass in an offset that it presumes the object is located > > > > at. The kernel will then do its utmost to fit the object into that > > > > location. The assumption is that userspace is handling its own object > > > > locations (for example along with full-ppgtt) and that the kernel will > > > > rarely have to make space for the user's requests. > > > > > > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > > > > > > > v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris > > > > Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. > > > > (Not published externally) > > > > > > > > v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction > > > > to allow eviction of soft-pinned objects when another soft-pinned object > > used > > > > by a subsequent execbuffer overlaps reported by Michal Winiarski. > > > > (Not published externally) > > > > > > > > v4: Moved soft-pinned objects to the front of ordered_vmas so that they are > > > > pinned first after an address conflict happens to avoid repeated conflicts in > > > > rare cases (Suggested by Chris Wilson). Expanded comment on > > > > drm_i915_gem_exec_object2.offset to cover this new API. > > > > Note this patch is outdated compared to the one I have in my tree. There > > are also requirements to improve drm_mm_reserve_node(). > What requirements are these? I switched execbuffer to try reusing the last address by default (i.e. softpin everything) and the linear walk in reserve_node and evict_for_vma were major bottlenecks. Adding an interval-tree to drm_mm fixes the issue but also requires rewriting evict_for_vma. Plus using it by default highlighted a few errors in the failure handling. Also don't forget that softpinning requires us to be able to reuse the vm for all rings, i.e. we need the API fix to allow user contexts for rings other than RCS. -Chris
On 6/30/2015 7:50 PM, Daniel, Thomas wrote: > Many apologies to Michal for incorrectly spelling his name in the CC list. > > Thomas. > >> -----Original Message----- >> From: Daniel, Thomas >> Sent: Tuesday, June 30, 2015 3:13 PM >> To: intel-gfx@lists.freedesktop.org >> Cc: Chris Wilson; Goel, Akash; Belgaumkar, Vinay; Michal Winiarsky; Zou, >> Nanhai; Daniel, Thomas >> Subject: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer >> >> From: Chris Wilson <chris@chris-wilson.co.uk> >> >> Userspace can pass in an offset that it presumes the object is located >> at. The kernel will then do its utmost to fit the object into that >> location. The assumption is that userspace is handling its own object >> locations (for example along with full-ppgtt) and that the kernel will >> rarely have to make space for the user's requests. >> >> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> >> >> v2: Fixed incorrect eviction found by Michal Winiarski - fix suggested by Chris >> Wilson. Fixed incorrect error paths causing crash found by Michal Winiarski. >> (Not published externally) >> >> v3: Rebased because of trivial conflict in object_bind_to_vm. Fixed eviction >> to allow eviction of soft-pinned objects when another soft-pinned object used >> by a subsequent execbuffer overlaps reported by Michal Winiarski. >> (Not published externally) >> >> v4: Moved soft-pinned objects to the front of ordered_vmas so that they are >> pinned first after an address conflict happens to avoid repeated conflicts in >> rare cases (Suggested by Chris Wilson). Expanded comment on >> drm_i915_gem_exec_object2.offset to cover this new API. >> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Akash Goel <akash.goel@intel.com> >> Cc: Vinay Belgaumkar <vinay.belgaumkar@intel.com> >> Cc: Michal Winiarsky <michal.winiarsky@intel.com> >> Cc: Zou Nanhai <nanhai.zou@intel.com> >> Signed-off-by: Thomas Daniel <thomas.daniel@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 4 +++ >> drivers/gpu/drm/i915/i915_gem.c | 51 ++++++++++++++++++++-------- >> drivers/gpu/drm/i915/i915_gem_evict.c | 38 +++++++++++++++++++++ >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++-- >> include/uapi/drm/i915_drm.h | 9 +++-- >> 5 files changed, 99 insertions(+), 19 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index d90a782..e96c101 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma); >> #define PIN_USER (1<<4) >> #define PIN_UPDATE (1<<5) >> #define PIN_FULL_RANGE (1<<6) >> +#define PIN_OFFSET_FIXED (1<<7) >> #define PIN_OFFSET_MASK (~4095) >> + >> int __must_check >> i915_gem_object_pin(struct drm_i915_gem_object *obj, >> struct i915_address_space *vm, >> @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct >> drm_device *dev, >> unsigned long start, >> unsigned long end, >> unsigned flags); >> +int __must_check >> +i915_gem_evict_for_vma(struct i915_vma *target); >> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); >> int i915_gem_evict_everything(struct drm_device *dev); >> >> diff --git a/drivers/gpu/drm/i915/i915_gem.c >> b/drivers/gpu/drm/i915/i915_gem.c >> index f170da6..a7e5ff2 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct >> drm_i915_gem_object *obj, >> if (IS_ERR(vma)) >> goto err_unpin; >> >> + if (flags & PIN_OFFSET_FIXED) { >> + uint64_t offset = flags & PIN_OFFSET_MASK; >> + if (offset & (alignment - 1)) { >> + ret = -EINVAL; >> + goto err_free_vma; >> + } >> + vma->node.start = offset; >> + vma->node.size = size; >> + vma->node.color = obj->cache_level; >> + ret = drm_mm_reserve_node(&vm->mm, &vma->node); >> + if (ret) { >> + ret = i915_gem_evict_for_vma(vma); >> + if (ret == 0) >> + ret = drm_mm_reserve_node(&vm->mm, >> &vma->node); >> + } >> + if (ret) >> + goto err_free_vma; >> + } else { >> search_free: >> - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, >> - size, alignment, >> - obj->cache_level, >> - start, end, >> - search_flag, >> - alloc_flag); >> - if (ret) { >> - ret = i915_gem_evict_something(dev, vm, size, alignment, >> - obj->cache_level, >> - start, end, >> - flags); >> - if (ret == 0) >> - goto search_free; >> + ret = drm_mm_insert_node_in_range_generic(&vm->mm, >> &vma->node, >> + size, alignment, >> + obj->cache_level, >> + start, end, >> + search_flag, >> + alloc_flag); >> + if (ret) { >> + ret = i915_gem_evict_something(dev, vm, size, >> alignment, >> + obj->cache_level, >> + start, end, >> + flags); >> + if (ret == 0) >> + goto search_free; >> >> - goto err_free_vma; >> + goto err_free_vma; >> + } >> } >> if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { >> ret = -EINVAL; >> @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, >> uint32_t alignment, uint64_t flags) >> vma->node.start < (flags & PIN_OFFSET_MASK)) >> return true; >> >> + if (flags & PIN_OFFSET_FIXED && >> + vma->node.start != (flags & PIN_OFFSET_MASK)) >> + return true; >> + >> return false; >> } >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c >> b/drivers/gpu/drm/i915/i915_gem_evict.c >> index d09e35e..6098e2f 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_evict.c >> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c >> @@ -199,6 +199,44 @@ found: >> return ret; >> } >> >> +int >> +i915_gem_evict_for_vma(struct i915_vma *target) >> +{ >> + struct drm_mm_node *node, *next; >> + >> + list_for_each_entry_safe(node, next, >> + &target->vm->mm.head_node.node_list, >> + node_list) { >> + struct i915_vma *vma; >> + int ret; >> + >> + if (node->start + node->size <= target->node.start) >> + continue; >> + if (node->start >= target->node.start + target->node.size) >> + break; >> + >> + vma = container_of(node, typeof(*vma), node); >> + >> + if (vma->pin_count) { >> + /* We may need to evict a buffer in the same batch */ >> + if (!vma->exec_entry) >> + return -EBUSY; >> + >> + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) >> + /* Overlapping fixed objects in the same batch >> */ >> + return -EINVAL; >> + >> + return -ENOSPC; Can we actually hit this condition, considering the soft pinned objects are now on the front side of 'eb->vmas' list ? If we do encounter such a case, it probably means that the overlapping object is already pinned from some other path. Is there a scope of an additional check here ? i.e. if (vma->pin_count) is > 1, this indicates that the object is not only pinned due to execbuffer, hence cannot be evicted, so -EBUSY can be straight away returned to User. Best regards Akash >> + } >> + >> + ret = i915_vma_unbind(vma); >> + if (ret) >> + return ret; >> + } >> + >> + return 0; >> +} >> + >> /** >> * i915_gem_evict_vm - Evict all idle vmas from a vm >> * @vm: Address space to cleanse >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index 3d94744..4e6955e 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma >> *vma, >> flags |= PIN_GLOBAL | PIN_MAPPABLE; >> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) >> flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; >> + if (entry->flags & EXEC_OBJECT_PINNED) >> + flags |= entry->offset | PIN_OFFSET_FIXED; >> } >> >> ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); >> @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma) >> vma->node.start & (entry->alignment - 1)) >> return true; >> >> + if (entry->flags & EXEC_OBJECT_PINNED && >> + vma->node.start != entry->offset) >> + return true; >> + >> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && >> vma->node.start < BATCH_OFFSET_BIAS) >> return true; >> @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs >> *ring, >> struct i915_vma *vma; >> struct i915_address_space *vm; >> struct list_head ordered_vmas; >> + struct list_head pinned_vmas; >> bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; >> int retry; >> >> @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs >> *ring, >> vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; >> >> INIT_LIST_HEAD(&ordered_vmas); >> + INIT_LIST_HEAD(&pinned_vmas); >> while (!list_empty(vmas)) { >> struct drm_i915_gem_exec_object2 *entry; >> bool need_fence, need_mappable; >> @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs >> *ring, >> obj->tiling_mode != I915_TILING_NONE; >> need_mappable = need_fence || need_reloc_mappable(vma); >> >> - if (need_mappable) { >> + if (entry->flags & EXEC_OBJECT_PINNED) >> + list_move_tail(&vma->exec_list, &pinned_vmas); >> + else if (need_mappable) { >> entry->flags |= __EXEC_OBJECT_NEEDS_MAP; >> list_move(&vma->exec_list, &ordered_vmas); >> } else >> @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs >> *ring, >> obj->base.pending_write_domain = 0; >> } >> list_splice(&ordered_vmas, vmas); >> + list_splice(&pinned_vmas, vmas); >> >> /* Attempt to pin all of the buffers into the GTT. >> * This is done in 3 phases: >> @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb) >> * Note that actual hangs have only been observed on gen7, but for >> * paranoia do it everywhere. >> */ >> - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; >> + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) >> + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; >> >> return vma->obj; >> } >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 55ba527..18f8f99 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 { >> __u64 alignment; >> >> /** >> - * Returned value of the updated offset of the object, for future >> - * presumed_offset writes. >> + * When the EXEC_OBJECT_PINNED flag is specified this is populated by >> + * the user with the GTT offset at which this object will be pinned. >> + * Otherwise the kernel populates it with the value of the updated >> + * offset of the object, for future presumed_offset writes. >> */ >> __u64 offset; >> >> @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 { >> #define EXEC_OBJECT_NEEDS_GTT (1<<1) >> #define EXEC_OBJECT_WRITE (1<<2) >> #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3) >> -#define __EXEC_OBJECT_UNKNOWN_FLAGS - >> (EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1) >> +#define EXEC_OBJECT_PINNED (1<<4) >> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) >> __u64 flags; >> >> __u64 rsvd1; >> -- >> 1.7.9.5 >
On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote: > >>+int > >>+i915_gem_evict_for_vma(struct i915_vma *target) > >>+{ > >>+ struct drm_mm_node *node, *next; > >>+ > >>+ list_for_each_entry_safe(node, next, > >>+ &target->vm->mm.head_node.node_list, > >>+ node_list) { > >>+ struct i915_vma *vma; > >>+ int ret; > >>+ > >>+ if (node->start + node->size <= target->node.start) > >>+ continue; > >>+ if (node->start >= target->node.start + target->node.size) > >>+ break; > >>+ > >>+ vma = container_of(node, typeof(*vma), node); > >>+ > >>+ if (vma->pin_count) { > >>+ /* We may need to evict a buffer in the same batch */ > >>+ if (!vma->exec_entry) > >>+ return -EBUSY; > >>+ > >>+ if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > >>+ /* Overlapping fixed objects in the same batch > >>*/ > >>+ return -EINVAL; > >>+ > >>+ return -ENOSPC; > > Can we actually hit this condition, considering the soft pinned > objects are now on the front side of 'eb->vmas' list ? > If we do encounter such a case, it probably means that the > overlapping object is already pinned from some other path. Note that softpinned objects are only first on the second pass through the reservation. > Is there a scope of an additional check here ? > i.e. if (vma->pin_count) is > 1, this indicates that the object is > not only pinned due to execbuffer, hence cannot be evicted, so > -EBUSY can be straight away returned to User. Consider this instead: int i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags) { struct list_head eviction_list; struct interval_tree_node *it; u64 end = target->node.start + target->node.size; struct drm_mm_node *node; struct i915_vma *vma, *next; int ret; it = interval_tree_iter_first(&target->vm->mm.interval_tree, target->node.start, end -1); if (it == NULL) return 0; INIT_LIST_HEAD(&eviction_list); node = container_of(it, typeof(*node), it); list_for_each_entry_from(node, &target->vm->mm.head_node.node_list, node_list) { if (node->start >= end) break; vma = container_of(node, typeof(*vma), node); if (flags & PIN_NONBLOCK && (vma->pin_count || vma->active.request)) { ret = -ENOSPC; break; } if (vma->exec_entry && vma->exec_entry->flags & EXEC_OBJECT_PINNED) { /* Overlapping pinned objects in the same batch */ ret = -EINVAL; break; } if (vma->pin_count) { /* We may need to evict an buffer in the same batch */ ret = vma->exec_entry ? -ENOSPC : -EBUSY; break; } list_add(&vma->exec_list, &eviction_list); drm_gem_object_reference(&vma->obj->base); } ret = 0; list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { struct drm_i915_gem_object *obj = vma->obj; if (ret == 0) ret = i915_vma_unbind(vma); drm_gem_object_unreference(&obj->base); } return ret; } -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Wednesday, July 15, 2015 4:06 PM > To: Goel, Akash > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; > Winiarski, Michal; Zou, Nanhai > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote: > > >>+int > > >>+i915_gem_evict_for_vma(struct i915_vma *target) > > >>+{ > > >>+ struct drm_mm_node *node, *next; > > >>+ > > >>+ list_for_each_entry_safe(node, next, > > >>+ &target->vm->mm.head_node.node_list, > > >>+ node_list) { > > >>+ struct i915_vma *vma; > > >>+ int ret; > > >>+ > > >>+ if (node->start + node->size <= target->node.start) > > >>+ continue; > > >>+ if (node->start >= target->node.start + target->node.size) > > >>+ break; > > >>+ > > >>+ vma = container_of(node, typeof(*vma), node); > > >>+ > > >>+ if (vma->pin_count) { > > >>+ /* We may need to evict a buffer in the same batch */ > > >>+ if (!vma->exec_entry) > > >>+ return -EBUSY; > > >>+ > > >>+ if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > > >>+ /* Overlapping fixed objects in the same batch > > >>*/ > > >>+ return -EINVAL; > > >>+ > > >>+ return -ENOSPC; > > > > Can we actually hit this condition, considering the soft pinned > > objects are now on the front side of 'eb->vmas' list ? > > If we do encounter such a case, it probably means that the > > overlapping object is already pinned from some other path. > > Note that softpinned objects are only first on the second pass through > the reservation. Eh? I modified i915_gem_execbuffer_reserve() to always put the softpinned vmas first so they should never collide with objects in the same execbuff. > > > Is there a scope of an additional check here ? > > i.e. if (vma->pin_count) is > 1, this indicates that the object is > > not only pinned due to execbuffer, hence cannot be evicted, so > > -EBUSY can be straight away returned to User. > > Consider this instead: > > int > i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags) > { > struct list_head eviction_list; > struct interval_tree_node *it; > u64 end = target->node.start + target->node.size; > struct drm_mm_node *node; > struct i915_vma *vma, *next; > int ret; > > it = interval_tree_iter_first(&target->vm->mm.interval_tree, > target->node.start, end -1); > if (it == NULL) > return 0; > > INIT_LIST_HEAD(&eviction_list); > node = container_of(it, typeof(*node), it); > list_for_each_entry_from(node, > &target->vm->mm.head_node.node_list, > node_list) { > if (node->start >= end) > break; > > vma = container_of(node, typeof(*vma), node); > if (flags & PIN_NONBLOCK && > (vma->pin_count || vma->active.request)) { > ret = -ENOSPC; > break; > } > > if (vma->exec_entry && > vma->exec_entry->flags & EXEC_OBJECT_PINNED) { > /* Overlapping pinned objects in the same batch */ > ret = -EINVAL; > break; > } > > if (vma->pin_count) { > /* We may need to evict an buffer in the same batch */ > ret = vma->exec_entry ? -ENOSPC : -EBUSY; > break; > } > > list_add(&vma->exec_list, &eviction_list); > drm_gem_object_reference(&vma->obj->base); > } > > ret = 0; > list_for_each_entry_safe(vma, next, &eviction_list, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > if (ret == 0) > ret = i915_vma_unbind(vma); > drm_gem_object_unreference(&obj->base); > } > > return ret; > } > -Chris This contains special stuff which I don't have visibility of (mm.interval_tree, vma.active). Thomas.
On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Wednesday, July 15, 2015 4:06 PM > > To: Goel, Akash > > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; > > Winiarski, Michal; Zou, Nanhai > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote: > > > >>+int > > > >>+i915_gem_evict_for_vma(struct i915_vma *target) > > > >>+{ > > > >>+ struct drm_mm_node *node, *next; > > > >>+ > > > >>+ list_for_each_entry_safe(node, next, > > > >>+ &target->vm->mm.head_node.node_list, > > > >>+ node_list) { > > > >>+ struct i915_vma *vma; > > > >>+ int ret; > > > >>+ > > > >>+ if (node->start + node->size <= target->node.start) > > > >>+ continue; > > > >>+ if (node->start >= target->node.start + target->node.size) > > > >>+ break; > > > >>+ > > > >>+ vma = container_of(node, typeof(*vma), node); > > > >>+ > > > >>+ if (vma->pin_count) { > > > >>+ /* We may need to evict a buffer in the same batch */ > > > >>+ if (!vma->exec_entry) > > > >>+ return -EBUSY; > > > >>+ > > > >>+ if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) > > > >>+ /* Overlapping fixed objects in the same batch > > > >>*/ > > > >>+ return -EINVAL; > > > >>+ > > > >>+ return -ENOSPC; > > > > > > Can we actually hit this condition, considering the soft pinned > > > objects are now on the front side of 'eb->vmas' list ? > > > If we do encounter such a case, it probably means that the > > > overlapping object is already pinned from some other path. > > > > Note that softpinned objects are only first on the second pass through > > the reservation. > Eh? I modified i915_gem_execbuffer_reserve() to always put the softpinned vmas first so they should never collide with objects in the same execbuff. That would be a severe performance penalty for the usual case where we only have to run once through the list. > This contains special stuff which I don't have visibility of (mm.interval_tree, vma.active). Yes. I mentioned that using softpin introduced some perf regressions. -Chris
> -----Original Message----- > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > Sent: Wednesday, July 15, 2015 4:47 PM > To: Daniel, Thomas > Cc: Goel, Akash; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Winiarski, > Michal; Zou, Nanhai > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote: > > > -----Original Message----- > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > Sent: Wednesday, July 15, 2015 4:06 PM > > > To: Goel, Akash > > > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; > > > Winiarski, Michal; Zou, Nanhai > > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > > > > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote: > > > > >>+int > > > > >>+i915_gem_evict_for_vma(struct i915_vma *target) > > > > >>+{ > > > > >>+ struct drm_mm_node *node, *next; > > > > >>+ > > > > >>+ list_for_each_entry_safe(node, next, > > > > >>+ &target->vm->mm.head_node.node_list, > > > > >>+ node_list) { > > > > >>+ struct i915_vma *vma; > > > > >>+ int ret; > > > > >>+ > > > > >>+ if (node->start + node->size <= target->node.start) > > > > >>+ continue; > > > > >>+ if (node->start >= target->node.start + target- > >node.size) > > > > >>+ break; > > > > >>+ > > > > >>+ vma = container_of(node, typeof(*vma), node); > > > > >>+ > > > > >>+ if (vma->pin_count) { > > > > >>+ /* We may need to evict a buffer in the same > batch */ > > > > >>+ if (!vma->exec_entry) > > > > >>+ return -EBUSY; > > > > >>+ > > > > >>+ if (vma->exec_entry->flags & > EXEC_OBJECT_PINNED) > > > > >>+ /* Overlapping fixed objects in the > same batch > > > > >>*/ > > > > >>+ return -EINVAL; > > > > >>+ > > > > >>+ return -ENOSPC; > > > > > > > > Can we actually hit this condition, considering the soft pinned > > > > objects are now on the front side of 'eb->vmas' list ? > > > > If we do encounter such a case, it probably means that the > > > > overlapping object is already pinned from some other path. > > > > > > Note that softpinned objects are only first on the second pass through > > > the reservation. > > Eh? I modified i915_gem_execbuffer_reserve() to always put the softpinned > vmas first so they should never collide with objects in the same execbuff. > > That would be a severe performance penalty for the usual case where we > only have to run once through the list. How so? execbuffer_reserve() already traverses the entire vma list and does a list_move or list_move_tail onto ordered_vmas for each entry. I've simply created a new list for the softpinned vmas and spliced the two together at the end. > > > This contains special stuff which I don't have visibility of (mm.interval_tree, > vma.active). > > Yes. I mentioned that using softpin introduced some perf regressions. > -Chris Can we get your improvements as follow-up patches? Cheers, Thomas.
On Wed, Jul 15, 2015 at 03:58:33PM +0000, Daniel, Thomas wrote: > > -----Original Message----- > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > Sent: Wednesday, July 15, 2015 4:47 PM > > To: Daniel, Thomas > > Cc: Goel, Akash; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Winiarski, > > Michal; Zou, Nanhai > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > > > On Wed, Jul 15, 2015 at 03:41:49PM +0000, Daniel, Thomas wrote: > > > > -----Original Message----- > > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk] > > > > Sent: Wednesday, July 15, 2015 4:06 PM > > > > To: Goel, Akash > > > > Cc: Daniel, Thomas; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; > > > > Winiarski, Michal; Zou, Nanhai > > > > Subject: Re: [PATCH v4] drm/i915: Add soft-pinning API for execbuffer > > > > > > > > On Wed, Jul 15, 2015 at 08:25:23PM +0530, Goel, Akash wrote: > > > > > >>+int > > > > > >>+i915_gem_evict_for_vma(struct i915_vma *target) > > > > > >>+{ > > > > > >>+ struct drm_mm_node *node, *next; > > > > > >>+ > > > > > >>+ list_for_each_entry_safe(node, next, > > > > > >>+ &target->vm->mm.head_node.node_list, > > > > > >>+ node_list) { > > > > > >>+ struct i915_vma *vma; > > > > > >>+ int ret; > > > > > >>+ > > > > > >>+ if (node->start + node->size <= target->node.start) > > > > > >>+ continue; > > > > > >>+ if (node->start >= target->node.start + target- > > >node.size) > > > > > >>+ break; > > > > > >>+ > > > > > >>+ vma = container_of(node, typeof(*vma), node); > > > > > >>+ > > > > > >>+ if (vma->pin_count) { > > > > > >>+ /* We may need to evict a buffer in the same > > batch */ > > > > > >>+ if (!vma->exec_entry) > > > > > >>+ return -EBUSY; > > > > > >>+ > > > > > >>+ if (vma->exec_entry->flags & > > EXEC_OBJECT_PINNED) > > > > > >>+ /* Overlapping fixed objects in the > > same batch > > > > > >>*/ > > > > > >>+ return -EINVAL; > > > > > >>+ > > > > > >>+ return -ENOSPC; > > > > > > > > > > Can we actually hit this condition, considering the soft pinned > > > > > objects are now on the front side of 'eb->vmas' list ? > > > > > If we do encounter such a case, it probably means that the > > > > > overlapping object is already pinned from some other path. > > > > > > > > Note that softpinned objects are only first on the second pass through > > > > the reservation. > > > Eh? I modified i915_gem_execbuffer_reserve() to always put the softpinned > > vmas first so they should never collide with objects in the same execbuff. > > > > That would be a severe performance penalty for the usual case where we > > only have to run once through the list. > How so? execbuffer_reserve() already traverses the entire vma list and does a list_move or list_move_tail onto ordered_vmas for each entry. I've simply created a new list for the softpinned vmas and spliced the two together at the end. I posted the fix for that a few months ago. > > > This contains special stuff which I don't have visibility of (mm.interval_tree, > > vma.active). > > > > Yes. I mentioned that using softpin introduced some perf regressions. > > -Chris > Can we get your improvements as follow-up patches? No. Softpin has requirements that are not met by the current kernel. -Chris
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d90a782..e96c101 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2747,7 +2747,9 @@ void i915_gem_vma_destroy(struct i915_vma *vma); #define PIN_USER (1<<4) #define PIN_UPDATE (1<<5) #define PIN_FULL_RANGE (1<<6) +#define PIN_OFFSET_FIXED (1<<7) #define PIN_OFFSET_MASK (~4095) + int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj, struct i915_address_space *vm, @@ -3085,6 +3087,8 @@ int __must_check i915_gem_evict_something(struct drm_device *dev, unsigned long start, unsigned long end, unsigned flags); +int __must_check +i915_gem_evict_for_vma(struct i915_vma *target); int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); int i915_gem_evict_everything(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f170da6..a7e5ff2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3806,22 +3806,41 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj, if (IS_ERR(vma)) goto err_unpin; + if (flags & PIN_OFFSET_FIXED) { + uint64_t offset = flags & PIN_OFFSET_MASK; + if (offset & (alignment - 1)) { + ret = -EINVAL; + goto err_free_vma; + } + vma->node.start = offset; + vma->node.size = size; + vma->node.color = obj->cache_level; + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + if (ret) { + ret = i915_gem_evict_for_vma(vma); + if (ret == 0) + ret = drm_mm_reserve_node(&vm->mm, &vma->node); + } + if (ret) + goto err_free_vma; + } else { search_free: - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, - size, alignment, - obj->cache_level, - start, end, - search_flag, - alloc_flag); - if (ret) { - ret = i915_gem_evict_something(dev, vm, size, alignment, - obj->cache_level, - start, end, - flags); - if (ret == 0) - goto search_free; + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node, + size, alignment, + obj->cache_level, + start, end, + search_flag, + alloc_flag); + if (ret) { + ret = i915_gem_evict_something(dev, vm, size, alignment, + obj->cache_level, + start, end, + flags); + if (ret == 0) + goto search_free; - goto err_free_vma; + goto err_free_vma; + } } if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) { ret = -EINVAL; @@ -4357,6 +4376,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags) vma->node.start < (flags & PIN_OFFSET_MASK)) return true; + if (flags & PIN_OFFSET_FIXED && + vma->node.start != (flags & PIN_OFFSET_MASK)) + return true; + return false; } diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index d09e35e..6098e2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -199,6 +199,44 @@ found: return ret; } +int +i915_gem_evict_for_vma(struct i915_vma *target) +{ + struct drm_mm_node *node, *next; + + list_for_each_entry_safe(node, next, + &target->vm->mm.head_node.node_list, + node_list) { + struct i915_vma *vma; + int ret; + + if (node->start + node->size <= target->node.start) + continue; + if (node->start >= target->node.start + target->node.size) + break; + + vma = container_of(node, typeof(*vma), node); + + if (vma->pin_count) { + /* We may need to evict a buffer in the same batch */ + if (!vma->exec_entry) + return -EBUSY; + + if (vma->exec_entry->flags & EXEC_OBJECT_PINNED) + /* Overlapping fixed objects in the same batch */ + return -EINVAL; + + return -ENOSPC; + } + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + /** * i915_gem_evict_vm - Evict all idle vmas from a vm * @vm: Address space to cleanse diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3d94744..4e6955e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -596,6 +596,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, flags |= PIN_GLOBAL | PIN_MAPPABLE; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if (entry->flags & EXEC_OBJECT_PINNED) + flags |= entry->offset | PIN_OFFSET_FIXED; } ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); @@ -665,6 +667,10 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; + if (entry->flags & EXEC_OBJECT_PINNED && + vma->node.start != entry->offset) + return true; + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BATCH_OFFSET_BIAS) return true; @@ -690,6 +696,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, struct i915_vma *vma; struct i915_address_space *vm; struct list_head ordered_vmas; + struct list_head pinned_vmas; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; int retry; @@ -698,6 +705,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; INIT_LIST_HEAD(&ordered_vmas); + INIT_LIST_HEAD(&pinned_vmas); while (!list_empty(vmas)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; @@ -716,7 +724,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) { + if (entry->flags & EXEC_OBJECT_PINNED) + list_move_tail(&vma->exec_list, &pinned_vmas); + else if (need_mappable) { entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); } else @@ -726,6 +736,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->base.pending_write_domain = 0; } list_splice(&ordered_vmas, vmas); + list_splice(&pinned_vmas, vmas); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: @@ -1404,7 +1415,8 @@ eb_get_batch(struct eb_vmas *eb) * Note that actual hangs have only been observed on gen7, but for * paranoia do it everywhere. */ - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; return vma->obj; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 55ba527..18f8f99 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -677,8 +677,10 @@ struct drm_i915_gem_exec_object2 { __u64 alignment; /** - * Returned value of the updated offset of the object, for future - * presumed_offset writes. + * When the EXEC_OBJECT_PINNED flag is specified this is populated by + * the user with the GTT offset at which this object will be pinned. + * Otherwise the kernel populates it with the value of the updated + * offset of the object, for future presumed_offset writes. */ __u64 offset; @@ -686,7 +688,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) #define EXEC_OBJECT_SUPPORTS_48BBADDRESS (1<<3) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48BBADDRESS<<1) +#define EXEC_OBJECT_PINNED (1<<4) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1) __u64 flags; __u64 rsvd1;