Message ID | 20190627080045.8814-7-lionel.g.landwerlin@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Vulkan performance query support | expand |
Quoting Lionel Landwerlin (2019-06-27 09:00:41) > Introduces a new parameters to execbuf so that we can specify syncobj > handles as well as timeline points. > > v2: Reuse i915_user_extension_fn > > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 280 ++++++++++++++---- > drivers/gpu/drm/i915/i915_drv.c | 2 + > include/uapi/drm/i915_drm.h | 37 +++ > 3 files changed, 263 insertions(+), 56 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > index 1970dd8cbac3..476fade6fcab 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > @@ -213,6 +213,13 @@ enum { > * the batchbuffer in trusted mode, otherwise the ioctl is rejected. > */ > > +struct i915_drm_dma_fences { i915_execbuffer_fences or eb_fences as it is private to the impl. > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ > + struct dma_fence *dma_fence; > + u64 value; > + struct dma_fence_chain *chain_fence; > +}; > + > struct i915_execbuffer { > struct drm_i915_private *i915; /** i915 backpointer */ > struct drm_file *file; /** per-file lookup tables and limits */ > @@ -275,6 +282,7 @@ struct i915_execbuffer { > > struct { > u64 flags; /** Available extensions parameters */ > + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; Another 40b on stack. Getting closer to those warning bells being set off. > } extensions; > }; > > @@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb, > } > > static void > -__free_fence_array(struct drm_syncobj **fences, unsigned int n) > +__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n) > { > - while (n--) > - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); > + while (n--) { > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); > + dma_fence_put(fences[n].dma_fence); > + kfree(fences[n].chain_fence); > + } > kvfree(fences); > } > > -static struct drm_syncobj ** > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_file *file) > +static struct i915_drm_dma_fences * > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > { > - const unsigned long nfences = args->num_cliprects; > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > + &eb->extensions.timeline_fences; > + struct drm_i915_gem_exec_fence __user *user_fences; > + struct i915_drm_dma_fences *fences; > + u64 __user *user_values; > + unsigned long n; > + int err; > + > + *out_n_fences = timeline_fences->handle_count; I'd encourage using a const num_fences = timeline_fences->handle_count for readability within the function and then assigning *out_n_fences = num_fences; right before the successful return fences; > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); > + if (*out_n_fences > min_t(unsigned long, > + ULONG_MAX / sizeof(*user_fences), > + SIZE_MAX / sizeof(*fences))) > + return ERR_PTR(-EINVAL); > + > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); > + if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences))) > + return ERR_PTR(-EFAULT); > + > + user_values = u64_to_user_ptr(timeline_fences->values_ptr); > + if (!access_ok(user_values, *out_n_fences * sizeof(*user_values))) > + return ERR_PTR(-EFAULT); > + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), > + __GFP_NOWARN | GFP_KERNEL); > + if (!fences) > + return ERR_PTR(-ENOMEM); > + > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); > + > + for (n = 0; n < *out_n_fences; n++) { > + struct drm_i915_gem_exec_fence user_fence; > + struct drm_syncobj *syncobj; > + struct dma_fence *fence = NULL; > + u64 point; > + > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { > + err = -EFAULT; > + goto err; > + } > + > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { > + err = -EINVAL; > + goto err; > + } > + > + if (__get_user(point, user_values++)) { > + err = -EFAULT; > + goto err; > + } > + > + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > + if (!syncobj) { > + DRM_DEBUG("Invalid syncobj handle provided\n"); > + err = -EINVAL; > + goto err; > + } > + > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > + fence = drm_syncobj_fence_get(syncobj); > + if (!fence) { > + DRM_DEBUG("Syncobj handle has no fence\n"); > + drm_syncobj_put(syncobj); > + err = -EINVAL; > + goto err; > + } > + > + err = dma_fence_chain_find_seqno(&fence, point); Is an imported syncobj always a fence-chain? drm_syncobj_import_sync_file_fence() says no. Similarly, we put ordinary fences into the syncobj ourselves if point==0, or the user uses the old path. I think you need something like if (dma_fence_is_chain() || point) { err = dma_fence_chain_find_seqno(&fence, point); if (err) { } } > + if (err) { > + DRM_DEBUG("Syncobj handle missing requested point\n"); > + goto err; > + } > + } > + > + /* > + * For timeline syncobjs we need to create a chain. "need to preallocate chains for later signaling > + */ > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { > + fences[n].chain_fence = > + kmalloc(sizeof(*fences[n].chain_fence), > + GFP_KERNEL); > + if (!fences[n].chain_fence) { > + dma_fence_put(fence); > + drm_syncobj_put(syncobj); > + err = -ENOMEM; > + DRM_DEBUG("Unable to alloc chain_fence\n"); > + goto err; > + } > + } else { > + fences[n].chain_fence = NULL; > + } > + > + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); > + fences[n].dma_fence = fence; > + fences[n].value = point; > + } > + > + return fences; > + > +err: > + __free_fence_array(fences, n); > + return ERR_PTR(err); > +} > + > +static struct i915_drm_dma_fences * > +get_legacy_fence_array(struct i915_execbuffer *eb, > + int *out_n_fences) > +{ > + struct drm_i915_gem_execbuffer2 *args = eb->args; > struct drm_i915_gem_exec_fence __user *user; > - struct drm_syncobj **fences; > + struct i915_drm_dma_fences *fences; > unsigned long n; > int err; > > - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) > - return NULL; > + *out_n_fences = args->num_cliprects; Same suggestion as earlier for a local num_fences. > +static struct i915_drm_dma_fences * > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > +{ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return get_legacy_fence_array(eb, out_n_fences); Blank line. > + if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) > + return get_timeline_fence_array(eb, out_n_fences); > + > + *out_n_fences = 0; > + No blank line, my argument is that both are part of the return value for the function. > + return NULL; > +} > + > static void > -put_fence_array(struct drm_i915_gem_execbuffer2 *args, > - struct drm_syncobj **fences) > +put_fence_array(struct i915_drm_dma_fences *fences, int nfences) > { > if (fences) > - __free_fence_array(fences, args->num_cliprects); > + __free_fence_array(fences, nfences); > } > > static int > await_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_drm_dma_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > unsigned int n; > int err; > > for (n = 0; n < nfences; n++) { > struct drm_syncobj *syncobj; > - struct dma_fence *fence; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > if (!(flags & I915_EXEC_FENCE_WAIT)) > continue; > > - fence = drm_syncobj_fence_get(syncobj); > - if (!fence) > - return -EINVAL; > - > - err = i915_request_await_dma_fence(eb->request, fence); > - dma_fence_put(fence); > + err = i915_request_await_dma_fence(eb->request, > + fences[n].dma_fence); > if (err < 0) > return err; > } > @@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb, > > static void > signal_fence_array(struct i915_execbuffer *eb, > - struct drm_syncobj **fences) > + struct i915_drm_dma_fences *fences, > + int nfences) > { > - const unsigned int nfences = eb->args->num_cliprects; > struct dma_fence * const fence = &eb->request->fence; > unsigned int n; > > @@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb, > struct drm_syncobj *syncobj; > unsigned int flags; > > - syncobj = ptr_unpack_bits(fences[n], &flags, 2); > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > if (!(flags & I915_EXEC_FENCE_SIGNAL)) > continue; > > - drm_syncobj_replace_fence(syncobj, fence); > + if (fences[n].chain_fence) { > + drm_syncobj_add_point(syncobj, fences[n].chain_fence, > + fence, fences[n].value); > + /* > + * The chain's ownership is transfered to the > + * timeline. > + */ > + fences[n].chain_fence = NULL; > + } else { > + drm_syncobj_replace_fence(syncobj, fence); > + } > } > } > > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) > +{ > + struct i915_execbuffer *eb = data; > + > + /* Timeline fences are incompatible with the fence array flag. */ > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) > + return -EINVAL; Also you only allow one timeline_fence extension struct. > + > + if (copy_from_user(&eb->extensions.timeline_fences, ext, > + sizeof(eb->extensions.timeline_fences))) > + return -EFAULT; > + > + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); > + > + return 0; > +} > + > static const i915_user_extension_fn execbuf_extensions[] = { > + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, > }; > > static int > @@ -2372,14 +2541,15 @@ static int > i915_gem_do_execbuffer(struct drm_device *dev, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > - struct drm_i915_gem_exec_object2 *exec, > - struct drm_syncobj **fences) > + struct drm_i915_gem_exec_object2 *exec) > { > struct i915_execbuffer eb; > struct dma_fence *in_fence = NULL; > struct dma_fence *exec_fence = NULL; > struct sync_file *out_fence = NULL; > + struct i915_drm_dma_fences *fences = NULL; > int out_fence_fd = -1; > + int nfences = 0; > int err; > > BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); > @@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > return err; > } > > + fences = get_fence_array(&eb, &nfences); > + if (IS_ERR(fences)) > + return PTR_ERR(fences); > + > if (args->flags & I915_EXEC_FENCE_IN) { > in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); > - if (!in_fence) > - return -EINVAL; > + if (!in_fence) { > + err = -EINVAL; > + goto err_fences; > + } > } > > if (args->flags & I915_EXEC_FENCE_SUBMIT) { > @@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > > if (fences) { > - err = await_fence_array(&eb, fences); > + err = await_fence_array(&eb, fences, nfences); > if (err) > goto err_request; > } > @@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > i915_request_add(eb.request); > > if (fences) > - signal_fence_array(&eb, fences); > + signal_fence_array(&eb, fences, nfences); > > if (out_fence) { > if (err == 0) { > @@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, > dma_fence_put(exec_fence); > err_in_fence: > dma_fence_put(in_fence); > +err_fences: > + put_fence_array(fences, nfences); > return err; > } > > @@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, > exec2_list[i].flags = 0; > } > > - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); > + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); > if (exec2.flags & __EXEC_HAS_RELOC) { > struct drm_i915_gem_exec_object __user *user_exec_list = > u64_to_user_ptr(args->buffers_ptr); > @@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > { > struct drm_i915_gem_execbuffer2 *args = data; > struct drm_i915_gem_exec_object2 *exec2_list; > - struct drm_syncobj **fences = NULL; > const size_t count = args->buffer_count; > int err; > > @@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, > return -EFAULT; > } > > - if (args->flags & I915_EXEC_FENCE_ARRAY) { > - fences = get_fence_array(args, file); > - if (IS_ERR(fences)) { > - kvfree(exec2_list); > - return PTR_ERR(fences); > - } > - } > - > - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); > + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); > > /* > * Now that we have begun execution of the batchbuffer, we ignore > @@ -2846,7 +3015,6 @@ end:; > } > > args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; > - put_fence_array(args, fences); > kvfree(exec2_list); > return err; > } > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 7c3c3b8ab339..48f9009a6318 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: > + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: > /* For the time being all of these are always true; > * if some supported hardware does not have one of these > * features this value needs to be provided from > @@ -3220,6 +3221,7 @@ static struct drm_driver driver = { > .driver_features = > DRIVER_GEM | > DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, > + DRIVER_SYNCOBJ_TIMELINE, With a comma ? i.e. .ioctls = DRIVER_SYNCOBJ_TIMELINE > .release = i915_driver_release, > .open = i915_driver_open, > .lastclose = i915_driver_lastclose, > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index efa195d6994e..b7fe1915e34d 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait { > */ > #define I915_PARAM_PERF_REVISION 54 > > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See > + * I915_EXEC_EXT. > + */ > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 > + > /* Must be kept compact -- no holes and well documented */ > > typedef struct drm_i915_getparam { > @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence { > }; > > enum drm_i915_gem_execbuffer_ext { > + /** > + * This identifier is associated with > + * drm_i915_gem_execbuf_ext_timeline_fences. Or just "See drm_i915_gem_execbuf_ext_timeline_fences > + */ > + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, = 0 don't leave it up to the compiler. > + > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ > }; > > +/** > + * This structure describes an array of drm_syncobj and associated points for > + * timeline variants of drm_syncobj. It is invalid to append this structure to > + * the execbuf if I915_EXEC_FENCE_ARRAY is set. > + */ > +struct drm_i915_gem_execbuffer_ext_timeline_fences { > + struct i915_user_extension base; > + > + /** > + * Number of element in the handles_ptr & value_ptr arrays. > + */ > + __u64 handle_count; Since it is used for both, I would use something like fence_count, or num_fences (although _count is in the majority). > + /** > + * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count. s/size/length/ I think we typically use size for byte sizes around the uAPI. > + */ > + __u64 handles_ptr; > + > + /** > + * Pointer to an array of u64 values of size handle_count. Values must > + * be 0 for a binary drm_syncobj. > + */ > + __u64 values_ptr; > +}; > + > struct drm_i915_gem_execbuffer2 { > /** > * List of gem_exec_object2 structs > -- > 2.21.0.392.gf8f6787159e > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Thanks a lot for your comments. On 27/06/2019 12:15, Chris Wilson wrote: > Quoting Lionel Landwerlin (2019-06-27 09:00:41) >> Introduces a new parameters to execbuf so that we can specify syncobj >> handles as well as timeline points. >> >> v2: Reuse i915_user_extension_fn >> >> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 280 ++++++++++++++---- >> drivers/gpu/drm/i915/i915_drv.c | 2 + >> include/uapi/drm/i915_drm.h | 37 +++ >> 3 files changed, 263 insertions(+), 56 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> index 1970dd8cbac3..476fade6fcab 100644 >> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c >> @@ -213,6 +213,13 @@ enum { >> * the batchbuffer in trusted mode, otherwise the ioctl is rejected. >> */ >> >> +struct i915_drm_dma_fences { > i915_execbuffer_fences or eb_fences as it is private to the impl. Sure. > >> + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ >> + struct dma_fence *dma_fence; >> + u64 value; >> + struct dma_fence_chain *chain_fence; >> +}; >> + >> struct i915_execbuffer { >> struct drm_i915_private *i915; /** i915 backpointer */ >> struct drm_file *file; /** per-file lookup tables and limits */ >> @@ -275,6 +282,7 @@ struct i915_execbuffer { >> >> struct { >> u64 flags; /** Available extensions parameters */ >> + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; > Another 40b on stack. Getting closer to those warning bells being set off. > >> } extensions; >> }; >> >> @@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb, >> } >> >> static void >> -__free_fence_array(struct drm_syncobj **fences, unsigned int n) >> +__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n) >> { >> - while (n--) >> - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); >> + while (n--) { >> + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); >> + dma_fence_put(fences[n].dma_fence); >> + kfree(fences[n].chain_fence); >> + } >> kvfree(fences); >> } >> >> -static struct drm_syncobj ** >> -get_fence_array(struct drm_i915_gem_execbuffer2 *args, >> - struct drm_file *file) >> +static struct i915_drm_dma_fences * >> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) >> { >> - const unsigned long nfences = args->num_cliprects; >> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = >> + &eb->extensions.timeline_fences; >> + struct drm_i915_gem_exec_fence __user *user_fences; >> + struct i915_drm_dma_fences *fences; >> + u64 __user *user_values; >> + unsigned long n; >> + int err; >> + >> + *out_n_fences = timeline_fences->handle_count; > I'd encourage using a const num_fences = timeline_fences->handle_count > for readability within the function and then assigning > *out_n_fences = num_fences; > right before the successful return fences; Done. > >> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ >> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); >> + if (*out_n_fences > min_t(unsigned long, >> + ULONG_MAX / sizeof(*user_fences), >> + SIZE_MAX / sizeof(*fences))) >> + return ERR_PTR(-EINVAL); >> + >> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); >> + if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences))) >> + return ERR_PTR(-EFAULT); >> + >> + user_values = u64_to_user_ptr(timeline_fences->values_ptr); >> + if (!access_ok(user_values, *out_n_fences * sizeof(*user_values))) >> + return ERR_PTR(-EFAULT); >> + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), >> + __GFP_NOWARN | GFP_KERNEL); >> + if (!fences) >> + return ERR_PTR(-ENOMEM); >> + >> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & >> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); >> + >> + for (n = 0; n < *out_n_fences; n++) { >> + struct drm_i915_gem_exec_fence user_fence; >> + struct drm_syncobj *syncobj; >> + struct dma_fence *fence = NULL; >> + u64 point; >> + >> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { >> + err = -EFAULT; >> + goto err; >> + } >> + >> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { >> + err = -EINVAL; >> + goto err; >> + } >> + >> + if (__get_user(point, user_values++)) { >> + err = -EFAULT; >> + goto err; >> + } >> + >> + syncobj = drm_syncobj_find(eb->file, user_fence.handle); >> + if (!syncobj) { >> + DRM_DEBUG("Invalid syncobj handle provided\n"); >> + err = -EINVAL; >> + goto err; >> + } >> + >> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { >> + fence = drm_syncobj_fence_get(syncobj); >> + if (!fence) { >> + DRM_DEBUG("Syncobj handle has no fence\n"); >> + drm_syncobj_put(syncobj); >> + err = -EINVAL; >> + goto err; >> + } >> + >> + err = dma_fence_chain_find_seqno(&fence, point); > Is an imported syncobj always a fence-chain? > > drm_syncobj_import_sync_file_fence() says no. drm_syncobj_import_sync_file_fence() would turn a timeline semaphore into a binary semaphore by breaking the chain. drm_syncobj_transfer_to_timeline() is what you should use if you wish to import a fence into the timeline. > > Similarly, we put ordinary fences into the syncobj ourselves if > point==0, or the user uses the old path. > > I think you need something like > > if (dma_fence_is_chain() || point) { > err = dma_fence_chain_find_seqno(&fence, point); > if (err) { > } > } dma_fence_chain_find_seqno() already deals with point == 0. If the fence is not a dma_fence_chain then the application screwed up somewhere. > >> + if (err) { >> + DRM_DEBUG("Syncobj handle missing requested point\n"); >> + goto err; >> + } >> + } >> + >> + /* >> + * For timeline syncobjs we need to create a chain. > "need to preallocate chains for later signaling Sure. > >> + */ >> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { >> + fences[n].chain_fence = >> + kmalloc(sizeof(*fences[n].chain_fence), >> + GFP_KERNEL); >> + if (!fences[n].chain_fence) { >> + dma_fence_put(fence); >> + drm_syncobj_put(syncobj); >> + err = -ENOMEM; >> + DRM_DEBUG("Unable to alloc chain_fence\n"); >> + goto err; >> + } >> + } else { >> + fences[n].chain_fence = NULL; >> + } >> + >> + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); >> + fences[n].dma_fence = fence; >> + fences[n].value = point; >> + } >> + >> + return fences; >> + >> +err: >> + __free_fence_array(fences, n); >> + return ERR_PTR(err); >> +} >> + >> +static struct i915_drm_dma_fences * >> +get_legacy_fence_array(struct i915_execbuffer *eb, >> + int *out_n_fences) >> +{ >> + struct drm_i915_gem_execbuffer2 *args = eb->args; >> struct drm_i915_gem_exec_fence __user *user; >> - struct drm_syncobj **fences; >> + struct i915_drm_dma_fences *fences; >> unsigned long n; >> int err; >> >> - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) >> - return NULL; >> + *out_n_fences = args->num_cliprects; > Same suggestion as earlier for a local num_fences. Done. > >> +static struct i915_drm_dma_fences * >> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) >> +{ >> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) >> + return get_legacy_fence_array(eb, out_n_fences); > Blank line. Oops. > >> + if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) >> + return get_timeline_fence_array(eb, out_n_fences); >> + >> + *out_n_fences = 0; >> + > No blank line, my argument is that both are part of the return value for > the function. Okay. >> + return NULL; >> +} >> + >> static void >> -put_fence_array(struct drm_i915_gem_execbuffer2 *args, >> - struct drm_syncobj **fences) >> +put_fence_array(struct i915_drm_dma_fences *fences, int nfences) >> { >> if (fences) >> - __free_fence_array(fences, args->num_cliprects); >> + __free_fence_array(fences, nfences); >> } >> >> static int >> await_fence_array(struct i915_execbuffer *eb, >> - struct drm_syncobj **fences) >> + struct i915_drm_dma_fences *fences, >> + int nfences) >> { >> - const unsigned int nfences = eb->args->num_cliprects; >> unsigned int n; >> int err; >> >> for (n = 0; n < nfences; n++) { >> struct drm_syncobj *syncobj; >> - struct dma_fence *fence; >> unsigned int flags; >> >> - syncobj = ptr_unpack_bits(fences[n], &flags, 2); >> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); >> if (!(flags & I915_EXEC_FENCE_WAIT)) >> continue; >> >> - fence = drm_syncobj_fence_get(syncobj); >> - if (!fence) >> - return -EINVAL; >> - >> - err = i915_request_await_dma_fence(eb->request, fence); >> - dma_fence_put(fence); >> + err = i915_request_await_dma_fence(eb->request, >> + fences[n].dma_fence); >> if (err < 0) >> return err; >> } >> @@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb, >> >> static void >> signal_fence_array(struct i915_execbuffer *eb, >> - struct drm_syncobj **fences) >> + struct i915_drm_dma_fences *fences, >> + int nfences) >> { >> - const unsigned int nfences = eb->args->num_cliprects; >> struct dma_fence * const fence = &eb->request->fence; >> unsigned int n; >> >> @@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb, >> struct drm_syncobj *syncobj; >> unsigned int flags; >> >> - syncobj = ptr_unpack_bits(fences[n], &flags, 2); >> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); >> if (!(flags & I915_EXEC_FENCE_SIGNAL)) >> continue; >> >> - drm_syncobj_replace_fence(syncobj, fence); >> + if (fences[n].chain_fence) { >> + drm_syncobj_add_point(syncobj, fences[n].chain_fence, >> + fence, fences[n].value); >> + /* >> + * The chain's ownership is transfered to the >> + * timeline. >> + */ >> + fences[n].chain_fence = NULL; >> + } else { >> + drm_syncobj_replace_fence(syncobj, fence); >> + } >> } >> } >> >> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) >> +{ >> + struct i915_execbuffer *eb = data; >> + >> + /* Timeline fences are incompatible with the fence array flag. */ >> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) >> + return -EINVAL; > Also you only allow one timeline_fence extension struct. Well spotted, thanks. > >> + >> + if (copy_from_user(&eb->extensions.timeline_fences, ext, >> + sizeof(eb->extensions.timeline_fences))) >> + return -EFAULT; >> + >> + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); >> + >> + return 0; >> +} >> + >> static const i915_user_extension_fn execbuf_extensions[] = { >> + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, >> }; >> >> static int >> @@ -2372,14 +2541,15 @@ static int >> i915_gem_do_execbuffer(struct drm_device *dev, >> struct drm_file *file, >> struct drm_i915_gem_execbuffer2 *args, >> - struct drm_i915_gem_exec_object2 *exec, >> - struct drm_syncobj **fences) >> + struct drm_i915_gem_exec_object2 *exec) >> { >> struct i915_execbuffer eb; >> struct dma_fence *in_fence = NULL; >> struct dma_fence *exec_fence = NULL; >> struct sync_file *out_fence = NULL; >> + struct i915_drm_dma_fences *fences = NULL; >> int out_fence_fd = -1; >> + int nfences = 0; >> int err; >> >> BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); >> @@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> return err; >> } >> >> + fences = get_fence_array(&eb, &nfences); >> + if (IS_ERR(fences)) >> + return PTR_ERR(fences); >> + >> if (args->flags & I915_EXEC_FENCE_IN) { >> in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); >> - if (!in_fence) >> - return -EINVAL; >> + if (!in_fence) { >> + err = -EINVAL; >> + goto err_fences; >> + } >> } >> >> if (args->flags & I915_EXEC_FENCE_SUBMIT) { >> @@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> } >> >> if (fences) { >> - err = await_fence_array(&eb, fences); >> + err = await_fence_array(&eb, fences, nfences); >> if (err) >> goto err_request; >> } >> @@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> i915_request_add(eb.request); >> >> if (fences) >> - signal_fence_array(&eb, fences); >> + signal_fence_array(&eb, fences, nfences); >> >> if (out_fence) { >> if (err == 0) { >> @@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> dma_fence_put(exec_fence); >> err_in_fence: >> dma_fence_put(in_fence); >> +err_fences: >> + put_fence_array(fences, nfences); >> return err; >> } >> >> @@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, >> exec2_list[i].flags = 0; >> } >> >> - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); >> + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); >> if (exec2.flags & __EXEC_HAS_RELOC) { >> struct drm_i915_gem_exec_object __user *user_exec_list = >> u64_to_user_ptr(args->buffers_ptr); >> @@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >> { >> struct drm_i915_gem_execbuffer2 *args = data; >> struct drm_i915_gem_exec_object2 *exec2_list; >> - struct drm_syncobj **fences = NULL; >> const size_t count = args->buffer_count; >> int err; >> >> @@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, >> return -EFAULT; >> } >> >> - if (args->flags & I915_EXEC_FENCE_ARRAY) { >> - fences = get_fence_array(args, file); >> - if (IS_ERR(fences)) { >> - kvfree(exec2_list); >> - return PTR_ERR(fences); >> - } >> - } >> - >> - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); >> + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); >> >> /* >> * Now that we have begun execution of the batchbuffer, we ignore >> @@ -2846,7 +3015,6 @@ end:; >> } >> >> args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; >> - put_fence_array(args, fences); >> kvfree(exec2_list); >> return err; >> } >> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c >> index 7c3c3b8ab339..48f9009a6318 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.c >> +++ b/drivers/gpu/drm/i915/i915_drv.c >> @@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, >> case I915_PARAM_HAS_EXEC_BATCH_FIRST: >> case I915_PARAM_HAS_EXEC_FENCE_ARRAY: >> case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: >> + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: >> /* For the time being all of these are always true; >> * if some supported hardware does not have one of these >> * features this value needs to be provided from >> @@ -3220,6 +3221,7 @@ static struct drm_driver driver = { >> .driver_features = >> DRIVER_GEM | >> DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, >> + DRIVER_SYNCOBJ_TIMELINE, > With a comma ? i.e. .ioctls = DRIVER_SYNCOBJ_TIMELINE Waaaat. How am I running a this and it works....? > >> .release = i915_driver_release, >> .open = i915_driver_open, >> .lastclose = i915_driver_lastclose, >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index efa195d6994e..b7fe1915e34d 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait { >> */ >> #define I915_PARAM_PERF_REVISION 54 >> >> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of >> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See >> + * I915_EXEC_EXT. >> + */ >> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 >> + >> /* Must be kept compact -- no holes and well documented */ >> >> typedef struct drm_i915_getparam { >> @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence { >> }; >> >> enum drm_i915_gem_execbuffer_ext { >> + /** >> + * This identifier is associated with >> + * drm_i915_gem_execbuf_ext_timeline_fences. > Or just "See drm_i915_gem_execbuf_ext_timeline_fences >> + */ >> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, > = 0 > don't leave it up to the compiler. Should we set every single item in the enum or just the first one? >> + >> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ >> }; >> >> +/** >> + * This structure describes an array of drm_syncobj and associated points for >> + * timeline variants of drm_syncobj. It is invalid to append this structure to >> + * the execbuf if I915_EXEC_FENCE_ARRAY is set. >> + */ >> +struct drm_i915_gem_execbuffer_ext_timeline_fences { >> + struct i915_user_extension base; >> + >> + /** >> + * Number of element in the handles_ptr & value_ptr arrays. >> + */ >> + __u64 handle_count; > Since it is used for both, I would use something like fence_count, or > num_fences (although _count is in the majority). Done. > >> + /** >> + * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count. > s/size/length/ > > I think we typically use size for byte sizes around the uAPI. Sure. > >> + */ >> + __u64 handles_ptr; >> + >> + /** >> + * Pointer to an array of u64 values of size handle_count. Values must >> + * be 0 for a binary drm_syncobj. >> + */ >> + __u64 values_ptr; >> +}; >> + >> struct drm_i915_gem_execbuffer2 { >> /** >> * List of gem_exec_object2 structs >> -- >> 2.21.0.392.gf8f6787159e >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Lionel Landwerlin (2019-06-27 11:49:56) > Thanks a lot for your comments. > > On 27/06/2019 12:15, Chris Wilson wrote: > >> + syncobj = drm_syncobj_find(eb->file, user_fence.handle); > >> + if (!syncobj) { > >> + DRM_DEBUG("Invalid syncobj handle provided\n"); > >> + err = -EINVAL; > >> + goto err; > >> + } > >> + > >> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { > >> + fence = drm_syncobj_fence_get(syncobj); > >> + if (!fence) { > >> + DRM_DEBUG("Syncobj handle has no fence\n"); > >> + drm_syncobj_put(syncobj); > >> + err = -EINVAL; > >> + goto err; > >> + } > >> + > >> + err = dma_fence_chain_find_seqno(&fence, point); > > Is an imported syncobj always a fence-chain? > > > > drm_syncobj_import_sync_file_fence() says no. > > > drm_syncobj_import_sync_file_fence() would turn a timeline semaphore > into a binary semaphore by breaking the chain. > > drm_syncobj_transfer_to_timeline() is what you should use if you wish to > import a fence into the timeline. What I am worrying about is the legality of the user passing in a non-timeline fence here. It looks like the interface will itself generate non-timeline fences... Oh, ignore me, I overlooked the early return for !seqno. So timelines are not allowed to use 0. Ever. That's a bit more of a hard rule than implied by the uapi.h :) > >> @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence { > >> }; > >> > >> enum drm_i915_gem_execbuffer_ext { > >> + /** > >> + * This identifier is associated with > >> + * drm_i915_gem_execbuf_ext_timeline_fences. > > Or just "See drm_i915_gem_execbuf_ext_timeline_fences > >> + */ > >> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, > > = 0 > > don't leave it up to the compiler. > > > Should we set every single item in the enum or just the first one? We've been setting the first one to be explicit (doubly useful when we want a certain value to be documented to be 0), and then gaps. The spec says that each id is incremented by one from the previous, it's just the first value and natural type of the enum is up to the compiler. -Chris
On 27/06/2019 13:59, Chris Wilson wrote: > Quoting Lionel Landwerlin (2019-06-27 11:49:56) >> Thanks a lot for your comments. >> >> On 27/06/2019 12:15, Chris Wilson wrote: >>>> + syncobj = drm_syncobj_find(eb->file, user_fence.handle); >>>> + if (!syncobj) { >>>> + DRM_DEBUG("Invalid syncobj handle provided\n"); >>>> + err = -EINVAL; >>>> + goto err; >>>> + } >>>> + >>>> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { >>>> + fence = drm_syncobj_fence_get(syncobj); >>>> + if (!fence) { >>>> + DRM_DEBUG("Syncobj handle has no fence\n"); >>>> + drm_syncobj_put(syncobj); >>>> + err = -EINVAL; >>>> + goto err; >>>> + } >>>> + >>>> + err = dma_fence_chain_find_seqno(&fence, point); >>> Is an imported syncobj always a fence-chain? >>> >>> drm_syncobj_import_sync_file_fence() says no. >> >> drm_syncobj_import_sync_file_fence() would turn a timeline semaphore >> into a binary semaphore by breaking the chain. >> >> drm_syncobj_transfer_to_timeline() is what you should use if you wish to >> import a fence into the timeline. > What I am worrying about is the legality of the user passing in a > non-timeline fence here. It looks like the interface will itself > generate non-timeline fences... Oh, ignore me, I overlooked the early > return for !seqno. I wrote that down in my reply then deleted it ;) > > So timelines are not allowed to use 0. Ever. That's a bit more of a hard > rule than implied by the uapi.h :) Anv turns any signal/wait on 0 into NOOP, those never reach i915. The spec is pretty clear that point 0 is always signaled. I'll add a statement with regard to timeline semaphores. Thanks! -Lionel > >>>> @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence { >>>> }; >>>> >>>> enum drm_i915_gem_execbuffer_ext { >>>> + /** >>>> + * This identifier is associated with >>>> + * drm_i915_gem_execbuf_ext_timeline_fences. >>> Or just "See drm_i915_gem_execbuf_ext_timeline_fences >>>> + */ >>>> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, >>> = 0 >>> don't leave it up to the compiler. >> >> Should we set every single item in the enum or just the first one? > We've been setting the first one to be explicit (doubly useful when we > want a certain value to be documented to be 0), and then gaps. The spec > says that each id is incremented by one from the previous, it's just the > first value and natural type of the enum is up to the compiler. > -Chris >
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1970dd8cbac3..476fade6fcab 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -213,6 +213,13 @@ enum { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct i915_drm_dma_fences { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -275,6 +282,7 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; } extensions; }; @@ -2224,67 +2232,193 @@ eb_select_engine(struct i915_execbuffer *eb, } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +__free_fence_array(struct i915_drm_dma_fences *fences, unsigned int n) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static struct i915_drm_dma_fences * +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) { - const unsigned long nfences = args->num_cliprects; + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = + &eb->extensions.timeline_fences; + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_drm_dma_fences *fences; + u64 __user *user_values; + unsigned long n; + int err; + + *out_n_fences = timeline_fences->handle_count; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (*out_n_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, *out_n_fences * sizeof(*user_fences))) + return ERR_PTR(-EFAULT); + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, *out_n_fences * sizeof(*user_values))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0; n < *out_n_fences; n++) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; + + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { + err = -EFAULT; + goto err; + } + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + + if (__get_user(point, user_values++)) { + err = -EFAULT; + goto err; + } + + syncobj = drm_syncobj_find(eb->file, user_fence.handle); + if (!syncobj) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -EINVAL; + goto err; + } + + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + err = -EINVAL; + goto err; + } + + err = dma_fence_chain_find_seqno(&fence, point); + if (err) { + DRM_DEBUG("Syncobj handle missing requested point\n"); + goto err; + } + } + + /* + * For timeline syncobjs we need to create a chain. + */ + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) { + fences[n].chain_fence = + kmalloc(sizeof(*fences[n].chain_fence), + GFP_KERNEL); + if (!fences[n].chain_fence) { + dma_fence_put(fence); + drm_syncobj_put(syncobj); + err = -ENOMEM; + DRM_DEBUG("Unable to alloc chain_fence\n"); + goto err; + } + } else { + fences[n].chain_fence = NULL; + } + + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + fences[n].dma_fence = fence; + fences[n].value = point; + } + + return fences; + +err: + __free_fence_array(fences, n); + return ERR_PTR(err); +} + +static struct i915_drm_dma_fences * +get_legacy_fence_array(struct i915_execbuffer *eb, + int *out_n_fences) +{ + struct drm_i915_gem_execbuffer2 *args = eb->args; struct drm_i915_gem_exec_fence __user *user; - struct drm_syncobj **fences; + struct i915_drm_dma_fences *fences; unsigned long n; int err; - if (!(args->flags & I915_EXEC_FENCE_ARRAY)) - return NULL; + *out_n_fences = args->num_cliprects; /* Check multiplication overflow for access_ok() and kvmalloc_array() */ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); - if (nfences > min_t(unsigned long, - ULONG_MAX / sizeof(*user), - SIZE_MAX / sizeof(*fences))) + if (*out_n_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user), + SIZE_MAX / sizeof(*fences))) return ERR_PTR(-EINVAL); user = u64_to_user_ptr(args->cliprects_ptr); - if (!access_ok(user, nfences * sizeof(*user))) + if (!access_ok(user, *out_n_fences * sizeof(*user))) return ERR_PTR(-EFAULT); - fences = kvmalloc_array(nfences, sizeof(*fences), + fences = kvmalloc_array(*out_n_fences, sizeof(*fences), __GFP_NOWARN | GFP_KERNEL); if (!fences) return ERR_PTR(-ENOMEM); - for (n = 0; n < nfences; n++) { - struct drm_i915_gem_exec_fence fence; + for (n = 0; n < *out_n_fences; n++) { + struct drm_i915_gem_exec_fence user_fence; struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; - if (__copy_from_user(&fence, user++, sizeof(fence))) { + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) { err = -EFAULT; goto err; } - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { err = -EINVAL; goto err; } - syncobj = drm_syncobj_find(file, fence.handle); + syncobj = drm_syncobj_find(eb->file, user_fence.handle); if (!syncobj) { DRM_DEBUG("Invalid syncobj handle provided\n"); err = -ENOENT; goto err; } + if (user_fence.flags & I915_EXEC_FENCE_WAIT) { + fence = drm_syncobj_fence_get(syncobj); + if (!fence) { + DRM_DEBUG("Syncobj handle has no fence\n"); + drm_syncobj_put(syncobj); + err = -EINVAL; + goto err; + } + } + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & ~__I915_EXEC_FENCE_UNKNOWN_FLAGS); - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2); + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2); + fences[n].dma_fence = fence; + fences[n].value = 0; + fences[n].chain_fence = NULL; } return fences; @@ -2294,37 +2428,44 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args, return ERR_PTR(err); } +static struct i915_drm_dma_fences * +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences) +{ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return get_legacy_fence_array(eb, out_n_fences); + if (eb->extensions.flags & BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES)) + return get_timeline_fence_array(eb, out_n_fences); + + *out_n_fences = 0; + + return NULL; +} + static void -put_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_syncobj **fences) +put_fence_array(struct i915_drm_dma_fences *fences, int nfences) { if (fences) - __free_fence_array(fences, args->num_cliprects); + __free_fence_array(fences, nfences); } static int await_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) + struct i915_drm_dma_fences *fences, + int nfences) { - const unsigned int nfences = eb->args->num_cliprects; unsigned int n; int err; for (n = 0; n < nfences; n++) { struct drm_syncobj *syncobj; - struct dma_fence *fence; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); if (!(flags & I915_EXEC_FENCE_WAIT)) continue; - fence = drm_syncobj_fence_get(syncobj); - if (!fence) - return -EINVAL; - - err = i915_request_await_dma_fence(eb->request, fence); - dma_fence_put(fence); + err = i915_request_await_dma_fence(eb->request, + fences[n].dma_fence); if (err < 0) return err; } @@ -2334,9 +2475,9 @@ await_fence_array(struct i915_execbuffer *eb, static void signal_fence_array(struct i915_execbuffer *eb, - struct drm_syncobj **fences) + struct i915_drm_dma_fences *fences, + int nfences) { - const unsigned int nfences = eb->args->num_cliprects; struct dma_fence * const fence = &eb->request->fence; unsigned int n; @@ -2344,15 +2485,43 @@ signal_fence_array(struct i915_execbuffer *eb, struct drm_syncobj *syncobj; unsigned int flags; - syncobj = ptr_unpack_bits(fences[n], &flags, 2); + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); if (!(flags & I915_EXEC_FENCE_SIGNAL)) continue; - drm_syncobj_replace_fence(syncobj, fence); + if (fences[n].chain_fence) { + drm_syncobj_add_point(syncobj, fences[n].chain_fence, + fence, fences[n].value); + /* + * The chain's ownership is transfered to the + * timeline. + */ + fences[n].chain_fence = NULL; + } else { + drm_syncobj_replace_fence(syncobj, fence); + } } } +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + + /* Timeline fences are incompatible with the fence array flag. */ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (copy_from_user(&eb->extensions.timeline_fences, ext, + sizeof(eb->extensions.timeline_fences))) + return -EFAULT; + + eb->extensions.flags |= BIT(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES); + + return 0; +} + static const i915_user_extension_fn execbuf_extensions[] = { + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, }; static int @@ -2372,14 +2541,15 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec, - struct drm_syncobj **fences) + struct drm_i915_gem_exec_object2 *exec) { struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct dma_fence *exec_fence = NULL; struct sync_file *out_fence = NULL; + struct i915_drm_dma_fences *fences = NULL; int out_fence_fd = -1; + int nfences = 0; int err; BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS); @@ -2421,10 +2591,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, return err; } + fences = get_fence_array(&eb, &nfences); + if (IS_ERR(fences)) + return PTR_ERR(fences); + if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) - return -EINVAL; + if (!in_fence) { + err = -EINVAL; + goto err_fences; + } } if (args->flags & I915_EXEC_FENCE_SUBMIT) { @@ -2582,7 +2758,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, } if (fences) { - err = await_fence_array(&eb, fences); + err = await_fence_array(&eb, fences, nfences); if (err) goto err_request; } @@ -2611,7 +2787,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, i915_request_add(eb.request); if (fences) - signal_fence_array(&eb, fences); + signal_fence_array(&eb, fences, nfences); if (out_fence) { if (err == 0) { @@ -2646,6 +2822,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, dma_fence_put(exec_fence); err_in_fence: dma_fence_put(in_fence); +err_fences: + put_fence_array(fences, nfences); return err; } @@ -2739,7 +2917,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2770,7 +2948,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; - struct drm_syncobj **fences = NULL; const size_t count = args->buffer_count; int err; @@ -2798,15 +2975,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data, return -EFAULT; } - if (args->flags & I915_EXEC_FENCE_ARRAY) { - fences = get_fence_array(args, file); - if (IS_ERR(fences)) { - kvfree(exec2_list); - return PTR_ERR(fences); - } - } - - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences); + err = i915_gem_do_execbuffer(dev, file, args, exec2_list); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2846,7 +3015,6 @@ end:; } args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; - put_fence_array(args, fences); kvfree(exec2_list); return err; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 7c3c3b8ab339..48f9009a6318 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -456,6 +456,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_BATCH_FIRST: case I915_PARAM_HAS_EXEC_FENCE_ARRAY: case I915_PARAM_HAS_EXEC_SUBMIT_FENCE: + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -3220,6 +3221,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_GEM | DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, + DRIVER_SYNCOBJ_TIMELINE, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index efa195d6994e..b7fe1915e34d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -617,6 +617,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See + * I915_EXEC_EXT. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1014,9 +1020,40 @@ struct drm_i915_gem_exec_fence { }; enum drm_i915_gem_execbuffer_ext { + /** + * This identifier is associated with + * drm_i915_gem_execbuf_ext_timeline_fences. + */ + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 handle_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of size handle_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of size handle_count. Values must + * be 0 for a binary drm_syncobj. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs
Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 280 ++++++++++++++---- drivers/gpu/drm/i915/i915_drv.c | 2 + include/uapi/drm/i915_drm.h | 37 +++ 3 files changed, 263 insertions(+), 56 deletions(-)