Message ID | 1499275282-14249-1-git-send-email-jason.ekstrand@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Jason Ekstrand (2017-07-05 18:21:22) > This commit adds support for waiting on or signaling DRM syncobjs as > part of execbuf. It does so by hijacking the currently unused cliprects > pointer to instead point to an array of i915_gem_exec_fence structs > which containe a DRM syncobj and a flags parameter which specifies > whether to wait on it or to signal it. This implementation > theoretically allows for both flags to be set in which case it waits on > the dma_fence that was in the syncobj and then immediately replaces it > with the dma_fence from the current execbuf. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > --- > > Ideally, I'd like to get whatever kernel reviewing and/or merging is > required to land the userspace bits ASAP. That said, I understand that > this is my first kernel patch and it's liable to have a few rounds of > review before going in. :-) > > The mesa branch for using this in Vulkan can be found here: > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/anv-syncobj > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 97 ++++++++++++++++++++++++++++-- > include/uapi/drm/i915_drm.h | 30 ++++++++- > 3 files changed, 121 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 9167a73..e6f7f49 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device *dev, void *data, > case I915_PARAM_HAS_EXEC_FENCE: > case I915_PARAM_HAS_EXEC_CAPTURE: > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > /* For the time being all of these are always true; > * if some supported hardware does not have one of these > * features this value needs to be provided from > @@ -2734,7 +2735,7 @@ static struct drm_driver driver = { > */ > .driver_features = > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, > .release = i915_driver_release, > .open = i915_driver_open, > .lastclose = i915_driver_lastclose, > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 929f275..81b7b7b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -33,6 +33,7 @@ > > #include <drm/drmP.h> > #include <drm/i915_drm.h> > +#include <drm/drm_syncobj.h> > > #include "i915_drv.h" > #include "i915_gem_clflush.h" > @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) > return false; > > /* Kernel clipping was a DRI1 misfeature */ > - if (exec->num_cliprects || exec->cliprects_ptr) > - return false; > + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { > + if (exec->num_cliprects || exec->cliprects_ptr) > + return false; > + } > > if (exec->DR4 == 0xffffffff) { > DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); > @@ -2118,12 +2121,15 @@ static int > i915_gem_do_execbuffer(struct drm_device *dev, > struct drm_file *file, > struct drm_i915_gem_execbuffer2 *args, > - struct drm_i915_gem_exec_object2 *exec) > + struct drm_i915_gem_exec_object2 *exec, > + struct drm_i915_gem_exec_fence *fences) > { > struct i915_execbuffer eb; > struct dma_fence *in_fence = NULL; > struct sync_file *out_fence = NULL; > + struct drm_syncobj **syncobjs = NULL; > int out_fence_fd = -1; > + unsigned int i; > int err; > > BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & > @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > } > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > + syncobjs = kvmalloc_array(args->num_cliprects, > + sizeof(*syncobjs), > + __GFP_NOWARN | GFP_TEMPORARY | > + __GFP_ZERO); > + if (syncobjs == NULL) { > + DRM_DEBUG("Failed to allocate array of %d syncobjs\n", > + args->num_cliprects); > + err = -ENOMEM; > + goto err_out_fence; > + } > + for (i = 0; i < args->num_cliprects; i++) { > + syncobjs[i] = drm_syncobj_find(file, fences[i].handle); > + if (!syncobjs[i]) { > + DRM_DEBUG("Invalid syncobj handle provided\n"); > + err = -EINVAL; > + goto err_syncobjs; > + } > + } I did this in the caller so that we only allocated and passed in a single array of drm_syncobj (with the flags packed into the low bits). > + } > + > err = eb_create(&eb); > if (err) > - goto err_out_fence; > + goto err_syncobjs; > > GEM_BUG_ON(!eb.lut_size); > > @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > goto err_request; > } > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > + for (i = 0; i < args->num_cliprects; i++) { > + if (fences[i].flags & I915_EXEC_FENCE_WAIT) { > + if (i915_gem_request_await_dma_fence(eb.request, > + syncobjs[i]->fence)) > + goto err_request; > + } > + } > + } > + > if (out_fence_fd != -1) { > out_fence = sync_file_create(&eb.request->fence); > if (!out_fence) { > @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, > } > } > > + /* We need to add fences to syncobjs last because doing so mutates the > + * syncobj and we have no good way to recover if the execbuf fails > + * after this point. Fortunately, drm_syncobj_replace_fence can never > + * fail. > + */ > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > + for (i = 0; i < args->num_cliprects; i++) { > + if (fences[i].flags & I915_EXEC_FENCE_SIGNAL) { > + drm_syncobj_replace_fence(file, syncobjs[i], > + &eb.request->fence); > + } > + } > + } > + This has to be after the execbuf is submitted, next to where out_fence is attached is a good spot, and should only be updated on success. (The kernel API allows combining of FENCE_WAIT | FENCE_SIGNAL so we can only replace a FENCE_WAIT once we know we have committed the execbuf.) I moved all of these to little functions: get_fence_array, await_fence_array, signal_fence_array and put_fence_array. -Chris
On Wed, Jul 5, 2017 at 10:37 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > Quoting Jason Ekstrand (2017-07-05 18:21:22) > > This commit adds support for waiting on or signaling DRM syncobjs as > > part of execbuf. It does so by hijacking the currently unused cliprects > > pointer to instead point to an array of i915_gem_exec_fence structs > > which containe a DRM syncobj and a flags parameter which specifies > > whether to wait on it or to signal it. This implementation > > theoretically allows for both flags to be set in which case it waits on > > the dma_fence that was in the syncobj and then immediately replaces it > > with the dma_fence from the current execbuf. > > > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > > > Ideally, I'd like to get whatever kernel reviewing and/or merging is > > required to land the userspace bits ASAP. That said, I understand that > > this is my first kernel patch and it's liable to have a few rounds of > > review before going in. :-) > > > > The mesa branch for using this in Vulkan can be found here: > > > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/anv-syncobj > > > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 97 > ++++++++++++++++++++++++++++-- > > include/uapi/drm/i915_drm.h | 30 ++++++++- > > 3 files changed, 121 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_drv.c > > index 9167a73..e6f7f49 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device *dev, > void *data, > > case I915_PARAM_HAS_EXEC_FENCE: > > case I915_PARAM_HAS_EXEC_CAPTURE: > > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > > + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > > /* For the time being all of these are always true; > > * if some supported hardware does not have one of these > > * features this value needs to be provided from > > @@ -2734,7 +2735,7 @@ static struct drm_driver driver = { > > */ > > .driver_features = > > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > DRIVER_PRIME | > > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, > > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | > DRIVER_SYNCOBJ, > > .release = i915_driver_release, > > .open = i915_driver_open, > > .lastclose = i915_driver_lastclose, > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > index 929f275..81b7b7b 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > @@ -33,6 +33,7 @@ > > > > #include <drm/drmP.h> > > #include <drm/i915_drm.h> > > +#include <drm/drm_syncobj.h> > > > > #include "i915_drv.h" > > #include "i915_gem_clflush.h" > > @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer(struct > drm_i915_gem_execbuffer2 *exec) > > return false; > > > > /* Kernel clipping was a DRI1 misfeature */ > > - if (exec->num_cliprects || exec->cliprects_ptr) > > - return false; > > + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { > > + if (exec->num_cliprects || exec->cliprects_ptr) > > + return false; > > + } > > > > if (exec->DR4 == 0xffffffff) { > > DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); > > @@ -2118,12 +2121,15 @@ static int > > i915_gem_do_execbuffer(struct drm_device *dev, > > struct drm_file *file, > > struct drm_i915_gem_execbuffer2 *args, > > - struct drm_i915_gem_exec_object2 *exec) > > + struct drm_i915_gem_exec_object2 *exec, > > + struct drm_i915_gem_exec_fence *fences) > > { > > struct i915_execbuffer eb; > > struct dma_fence *in_fence = NULL; > > struct sync_file *out_fence = NULL; > > + struct drm_syncobj **syncobjs = NULL; > > int out_fence_fd = -1; > > + unsigned int i; > > int err; > > > > BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & > > @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > } > > } > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + syncobjs = kvmalloc_array(args->num_cliprects, > > + sizeof(*syncobjs), > > + __GFP_NOWARN | GFP_TEMPORARY | > > + __GFP_ZERO); > > + if (syncobjs == NULL) { > > + DRM_DEBUG("Failed to allocate array of %d > syncobjs\n", > > + args->num_cliprects); > > + err = -ENOMEM; > > + goto err_out_fence; > > + } > > + for (i = 0; i < args->num_cliprects; i++) { > > + syncobjs[i] = drm_syncobj_find(file, > fences[i].handle); > > + if (!syncobjs[i]) { > > + DRM_DEBUG("Invalid syncobj handle > provided\n"); > > + err = -EINVAL; > > + goto err_syncobjs; > > + } > > + } > > I did this in the caller so that we only allocated and passed in a single > array of drm_syncobj (with the flags packed into the low bits). > Packing things into the low bits seems a bit sketchy but it works so long as we only have the two flag bits. > > + } > > + > > err = eb_create(&eb); > > if (err) > > - goto err_out_fence; > > + goto err_syncobjs; > > > > GEM_BUG_ON(!eb.lut_size); > > > > @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > goto err_request; > > } > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + for (i = 0; i < args->num_cliprects; i++) { > > + if (fences[i].flags & I915_EXEC_FENCE_WAIT) { > > + if (i915_gem_request_await_dma_ > fence(eb.request, > > + > syncobjs[i]->fence)) > > + goto err_request; > > + } > > + } > > + } > > + > > if (out_fence_fd != -1) { > > out_fence = sync_file_create(&eb.request->fence); > > if (!out_fence) { > > @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > } > > } > > > > + /* We need to add fences to syncobjs last because doing so mutates > the > > + * syncobj and we have no good way to recover if the execbuf fails > > + * after this point. Fortunately, drm_syncobj_replace_fence can > never > > + * fail. > > + */ > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + for (i = 0; i < args->num_cliprects; i++) { > > + if (fences[i].flags & I915_EXEC_FENCE_SIGNAL) { > > + drm_syncobj_replace_fence(file, > syncobjs[i], > > + > &eb.request->fence); > > + } > > + } > > + } > > + > > This has to be after the execbuf is submitted, next to where out_fence > is attached is a good spot, and should only be updated on success. (The > kernel API allows combining of FENCE_WAIT | FENCE_SIGNAL so we can only > replace a FENCE_WAIT once we know we have committed the execbuf.) > It's currently right after we call out_fence = sync_file_create(&eb.request->fence); which is before eb_submit(). Are sync files wrong? Note that I was careful to ensure that there are no error paths after syncobj_replace_fence so we know for 100% sure that it will be committed, hence the comment. > I moved all of these to little functions: get_fence_array, > await_fence_array, signal_fence_array and put_fence_array. > You clearly have a version of this patch somewhere. Where can I find it?
Quoting Jason Ekstrand (2017-07-05 19:32:21) > On Wed, Jul 5, 2017 at 10:37 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > Quoting Jason Ekstrand (2017-07-05 18:21:22) > > This commit adds support for waiting on or signaling DRM syncobjs as > > part of execbuf. It does so by hijacking the currently unused cliprects > > pointer to instead point to an array of i915_gem_exec_fence structs > > which containe a DRM syncobj and a flags parameter which specifies > > whether to wait on it or to signal it. This implementation > > theoretically allows for both flags to be set in which case it waits on > > the dma_fence that was in the syncobj and then immediately replaces it > > with the dma_fence from the current execbuf. > > > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > > --- > > > > Ideally, I'd like to get whatever kernel reviewing and/or merging is > > required to land the userspace bits ASAP. That said, I understand that > > this is my first kernel patch and it's liable to have a few rounds of > > review before going in. :-) > > > > The mesa branch for using this in Vulkan can be found here: > > > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/anv-syncobj > > > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 97 > ++++++++++++++++++++++++++++-- > > include/uapi/drm/i915_drm.h | 30 ++++++++- > > 3 files changed, 121 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_ > drv.c > > index 9167a73..e6f7f49 100644 > > --- a/drivers/gpu/drm/i915/i915_drv.c > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device *dev, void > *data, > > case I915_PARAM_HAS_EXEC_FENCE: > > case I915_PARAM_HAS_EXEC_CAPTURE: > > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > > + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > > /* For the time being all of these are always true; > > * if some supported hardware does not have one of these > > * features this value needs to be provided from > > @@ -2734,7 +2735,7 @@ static struct drm_driver driver = { > > */ > > .driver_features = > > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > DRIVER_PRIME | > > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, > > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | > DRIVER_SYNCOBJ, > > .release = i915_driver_release, > > .open = i915_driver_open, > > .lastclose = i915_driver_lastclose, > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm > /i915/i915_gem_execbuffer.c > > index 929f275..81b7b7b 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > @@ -33,6 +33,7 @@ > > > > #include <drm/drmP.h> > > #include <drm/i915_drm.h> > > +#include <drm/drm_syncobj.h> > > > > #include "i915_drv.h" > > #include "i915_gem_clflush.h" > > @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer(struct > drm_i915_gem_execbuffer2 *exec) > > return false; > > > > /* Kernel clipping was a DRI1 misfeature */ > > - if (exec->num_cliprects || exec->cliprects_ptr) > > - return false; > > + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { > > + if (exec->num_cliprects || exec->cliprects_ptr) > > + return false; > > + } > > > > if (exec->DR4 == 0xffffffff) { > > DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); > > @@ -2118,12 +2121,15 @@ static int > > i915_gem_do_execbuffer(struct drm_device *dev, > > struct drm_file *file, > > struct drm_i915_gem_execbuffer2 *args, > > - struct drm_i915_gem_exec_object2 *exec) > > + struct drm_i915_gem_exec_object2 *exec, > > + struct drm_i915_gem_exec_fence *fences) > > { > > struct i915_execbuffer eb; > > struct dma_fence *in_fence = NULL; > > struct sync_file *out_fence = NULL; > > + struct drm_syncobj **syncobjs = NULL; > > int out_fence_fd = -1; > > + unsigned int i; > > int err; > > > > BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & > > @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > } > > } > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + syncobjs = kvmalloc_array(args->num_cliprects, > > + sizeof(*syncobjs), > > + __GFP_NOWARN | GFP_TEMPORARY | > > + __GFP_ZERO); > > + if (syncobjs == NULL) { > > + DRM_DEBUG("Failed to allocate array of %d > syncobjs\n", > > + args->num_cliprects); > > + err = -ENOMEM; > > + goto err_out_fence; > > + } > > + for (i = 0; i < args->num_cliprects; i++) { > > + syncobjs[i] = drm_syncobj_find(file, fences > [i].handle); > > + if (!syncobjs[i]) { > > + DRM_DEBUG("Invalid syncobj handle > provided\n"); > > + err = -EINVAL; > > + goto err_syncobjs; > > + } > > + } > > I did this in the caller so that we only allocated and passed in a single > array of drm_syncobj (with the flags packed into the low bits). > > > Packing things into the low bits seems a bit sketchy but it works so long as we > only have the two flag bits. > > > > + } > > + > > err = eb_create(&eb); > > if (err) > > - goto err_out_fence; > > + goto err_syncobjs; > > > > GEM_BUG_ON(!eb.lut_size); > > > > @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > goto err_request; > > } > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + for (i = 0; i < args->num_cliprects; i++) { > > + if (fences[i].flags & I915_EXEC_FENCE_WAIT) { > > + if (i915_gem_request_await_dma_fence > (eb.request, > > + > syncobjs[i]->fence)) > > + goto err_request; > > + } > > + } > > + } > > + > > if (out_fence_fd != -1) { > > out_fence = sync_file_create(&eb.request->fence); > > if (!out_fence) { > > @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, > > } > > } > > > > + /* We need to add fences to syncobjs last because doing so mutates > the > > + * syncobj and we have no good way to recover if the execbuf fails > > + * after this point. Fortunately, drm_syncobj_replace_fence can > never > > + * fail. > > + */ > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > + for (i = 0; i < args->num_cliprects; i++) { > > + if (fences[i].flags & I915_EXEC_FENCE_SIGNAL) { > > + drm_syncobj_replace_fence(file, syncobjs > [i], > > + &eb.request-> > fence); > > + } > > + } > > + } > > + > > This has to be after the execbuf is submitted, next to where out_fence > is attached is a good spot, and should only be updated on success. (The > kernel API allows combining of FENCE_WAIT | FENCE_SIGNAL so we can only > replace a FENCE_WAIT once we know we have committed the execbuf.) > > > It's currently right after we call > > out_fence = sync_file_create(&eb.request->fence); That's where we create the out fence, not where we attach it to the request, see the fd_install, which is after we have successfully committed the request. > which is before eb_submit(). Are sync files wrong? Note that I was careful to > ensure that there are no error paths after syncobj_replace_fence so we know for > 100% sure that it will be committed, hence the comment. There's the rather big one in eb_submit() that will generate EINTR for your pain. > I moved all of these to little functions: get_fence_array, > await_fence_array, signal_fence_array and put_fence_array. > > > You clearly have a version of this patch somewhere. Where can I find it? It's still on the older syncobj api, I am easily distracted. :| -Chris
On Wed, Jul 5, 2017 at 11:42 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote: > Quoting Jason Ekstrand (2017-07-05 19:32:21) > > On Wed, Jul 5, 2017 at 10:37 AM, Chris Wilson <chris@chris-wilson.co.uk> > wrote: > > > > Quoting Jason Ekstrand (2017-07-05 18:21:22) > > > This commit adds support for waiting on or signaling DRM syncobjs > as > > > part of execbuf. It does so by hijacking the currently unused > cliprects > > > pointer to instead point to an array of i915_gem_exec_fence structs > > > which containe a DRM syncobj and a flags parameter which specifies > > > whether to wait on it or to signal it. This implementation > > > theoretically allows for both flags to be set in which case it > waits on > > > the dma_fence that was in the syncobj and then immediately > replaces it > > > with the dma_fence from the current execbuf. > > > > > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > > > --- > > > > > > Ideally, I'd like to get whatever kernel reviewing and/or merging > is > > > required to land the userspace bits ASAP. That said, I understand > that > > > this is my first kernel patch and it's liable to have a few rounds > of > > > review before going in. :-) > > > > > > The mesa branch for using this in Vulkan can be found here: > > > > > > https://cgit.freedesktop.org/~jekstrand/mesa/log/?h=review/ > anv-syncobj > > > > > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > > > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 97 > > ++++++++++++++++++++++++++++-- > > > include/uapi/drm/i915_drm.h | 30 ++++++++- > > > 3 files changed, 121 insertions(+), 9 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_drv.c > b/drivers/gpu/drm/i915/i915_ > > drv.c > > > index 9167a73..e6f7f49 100644 > > > --- a/drivers/gpu/drm/i915/i915_drv.c > > > +++ b/drivers/gpu/drm/i915/i915_drv.c > > > @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device > *dev, void > > *data, > > > case I915_PARAM_HAS_EXEC_FENCE: > > > case I915_PARAM_HAS_EXEC_CAPTURE: > > > case I915_PARAM_HAS_EXEC_BATCH_FIRST: > > > + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: > > > /* For the time being all of these are always true; > > > * if some supported hardware does not have one of > these > > > * features this value needs to be provided from > > > @@ -2734,7 +2735,7 @@ static struct drm_driver driver = { > > > */ > > > .driver_features = > > > DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | > > DRIVER_PRIME | > > > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, > > > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | > > DRIVER_SYNCOBJ, > > > .release = i915_driver_release, > > > .open = i915_driver_open, > > > .lastclose = i915_driver_lastclose, > > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > b/drivers/gpu/drm > > /i915/i915_gem_execbuffer.c > > > index 929f275..81b7b7b 100644 > > > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > > > @@ -33,6 +33,7 @@ > > > > > > #include <drm/drmP.h> > > > #include <drm/i915_drm.h> > > > +#include <drm/drm_syncobj.h> > > > > > > #include "i915_drv.h" > > > #include "i915_gem_clflush.h" > > > @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer( > struct > > drm_i915_gem_execbuffer2 *exec) > > > return false; > > > > > > /* Kernel clipping was a DRI1 misfeature */ > > > - if (exec->num_cliprects || exec->cliprects_ptr) > > > - return false; > > > + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { > > > + if (exec->num_cliprects || exec->cliprects_ptr) > > > + return false; > > > + } > > > > > > if (exec->DR4 == 0xffffffff) { > > > DRM_DEBUG("UXA submitting garbage DR4, fixing > up\n"); > > > @@ -2118,12 +2121,15 @@ static int > > > i915_gem_do_execbuffer(struct drm_device *dev, > > > struct drm_file *file, > > > struct drm_i915_gem_execbuffer2 *args, > > > - struct drm_i915_gem_exec_object2 *exec) > > > + struct drm_i915_gem_exec_object2 *exec, > > > + struct drm_i915_gem_exec_fence *fences) > > > { > > > struct i915_execbuffer eb; > > > struct dma_fence *in_fence = NULL; > > > struct sync_file *out_fence = NULL; > > > + struct drm_syncobj **syncobjs = NULL; > > > int out_fence_fd = -1; > > > + unsigned int i; > > > int err; > > > > > > BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & > > > @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device > *dev, > > > } > > > } > > > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > > + syncobjs = kvmalloc_array(args->num_cliprects, > > > + sizeof(*syncobjs), > > > + __GFP_NOWARN | > GFP_TEMPORARY | > > > + __GFP_ZERO); > > > + if (syncobjs == NULL) { > > > + DRM_DEBUG("Failed to allocate array of %d > > syncobjs\n", > > > + args->num_cliprects); > > > + err = -ENOMEM; > > > + goto err_out_fence; > > > + } > > > + for (i = 0; i < args->num_cliprects; i++) { > > > + syncobjs[i] = drm_syncobj_find(file, fences > > [i].handle); > > > + if (!syncobjs[i]) { > > > + DRM_DEBUG("Invalid syncobj handle > > provided\n"); > > > + err = -EINVAL; > > > + goto err_syncobjs; > > > + } > > > + } > > > > I did this in the caller so that we only allocated and passed in a > single > > array of drm_syncobj (with the flags packed into the low bits). > > > > > > Packing things into the low bits seems a bit sketchy but it works so > long as we > > only have the two flag bits. > > > > > > > + } > > > + > > > err = eb_create(&eb); > > > if (err) > > > - goto err_out_fence; > > > + goto err_syncobjs; > > > > > > GEM_BUG_ON(!eb.lut_size); > > > > > > @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device > *dev, > > > goto err_request; > > > } > > > > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > > + for (i = 0; i < args->num_cliprects; i++) { > > > + if (fences[i].flags & > I915_EXEC_FENCE_WAIT) { > > > + if (i915_gem_request_await_dma_ > fence > > (eb.request, > > > + > > > syncobjs[i]->fence)) > > > + goto err_request; > > > + } > > > + } > > > + } > > > + > > > if (out_fence_fd != -1) { > > > out_fence = sync_file_create(&eb.request->fence); > > > if (!out_fence) { > > > @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device > *dev, > > > } > > > } > > > > > > + /* We need to add fences to syncobjs last because doing so > mutates > > the > > > + * syncobj and we have no good way to recover if the execbuf > fails > > > + * after this point. Fortunately, drm_syncobj_replace_fence > can > > never > > > + * fail. > > > + */ > > > + if (args->flags & I915_EXEC_FENCE_ARRAY) { > > > + for (i = 0; i < args->num_cliprects; i++) { > > > + if (fences[i].flags & > I915_EXEC_FENCE_SIGNAL) { > > > + drm_syncobj_replace_fence(file, > syncobjs > > [i], > > > + > &eb.request-> > > fence); > > > + } > > > + } > > > + } > > > + > > > > This has to be after the execbuf is submitted, next to where > out_fence > > is attached is a good spot, and should only be updated on success. > (The > > kernel API allows combining of FENCE_WAIT | FENCE_SIGNAL so we can > only > > replace a FENCE_WAIT once we know we have committed the execbuf.) > > > > > > It's currently right after we call > > > > out_fence = sync_file_create(&eb.request->fence); > > That's where we create the out fence, not where we attach it to the > request, see the fd_install, which is after we have successfully > committed the request. > > > which is before eb_submit(). Are sync files wrong? Note that I was > careful to > > ensure that there are no error paths after syncobj_replace_fence so we > know for > > 100% sure that it will be committed, hence the comment. > > There's the rather big one in eb_submit() that will generate EINTR for > your pain. > Oh. :( I see it now. I'll move to right around fd_install() > > I moved all of these to little functions: get_fence_array, > > await_fence_array, signal_fence_array and put_fence_array. > > > > > > You clearly have a version of this patch somewhere. Where can I find it? > > It's still on the older syncobj api, I am easily distracted. :| > That's ok. If you could just point me at it, I'm happy to either pull the good ideas into mine or rebase it, whichever is easier.
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9167a73..e6f7f49 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -384,6 +384,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_FENCE: case I915_PARAM_HAS_EXEC_CAPTURE: case I915_PARAM_HAS_EXEC_BATCH_FIRST: + case I915_PARAM_HAS_EXEC_FENCE_ARRAY: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2734,7 +2735,7 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ, .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 929f275..81b7b7b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,7 @@ #include <drm/drmP.h> #include <drm/i915_drm.h> +#include <drm/drm_syncobj.h> #include "i915_drv.h" #include "i915_gem_clflush.h" @@ -1882,8 +1883,10 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return false; /* Kernel clipping was a DRI1 misfeature */ - if (exec->num_cliprects || exec->cliprects_ptr) - return false; + if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (exec->num_cliprects || exec->cliprects_ptr) + return false; + } if (exec->DR4 == 0xffffffff) { DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); @@ -2118,12 +2121,15 @@ static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, - struct drm_i915_gem_exec_object2 *exec) + struct drm_i915_gem_exec_object2 *exec, + struct drm_i915_gem_exec_fence *fences) { struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; + struct drm_syncobj **syncobjs = NULL; int out_fence_fd = -1; + unsigned int i; int err; BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & @@ -2186,9 +2192,30 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + if (args->flags & I915_EXEC_FENCE_ARRAY) { + syncobjs = kvmalloc_array(args->num_cliprects, + sizeof(*syncobjs), + __GFP_NOWARN | GFP_TEMPORARY | + __GFP_ZERO); + if (syncobjs == NULL) { + DRM_DEBUG("Failed to allocate array of %d syncobjs\n", + args->num_cliprects); + err = -ENOMEM; + goto err_out_fence; + } + for (i = 0; i < args->num_cliprects; i++) { + syncobjs[i] = drm_syncobj_find(file, fences[i].handle); + if (!syncobjs[i]) { + DRM_DEBUG("Invalid syncobj handle provided\n"); + err = -EINVAL; + goto err_syncobjs; + } + } + } + err = eb_create(&eb); if (err) - goto err_out_fence; + goto err_syncobjs; GEM_BUG_ON(!eb.lut_size); @@ -2304,6 +2331,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_request; } + if (args->flags & I915_EXEC_FENCE_ARRAY) { + for (i = 0; i < args->num_cliprects; i++) { + if (fences[i].flags & I915_EXEC_FENCE_WAIT) { + if (i915_gem_request_await_dma_fence(eb.request, + syncobjs[i]->fence)) + goto err_request; + } + } + } + if (out_fence_fd != -1) { out_fence = sync_file_create(&eb.request->fence); if (!out_fence) { @@ -2312,6 +2349,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + /* We need to add fences to syncobjs last because doing so mutates the + * syncobj and we have no good way to recover if the execbuf fails + * after this point. Fortunately, drm_syncobj_replace_fence can never + * fail. + */ + if (args->flags & I915_EXEC_FENCE_ARRAY) { + for (i = 0; i < args->num_cliprects; i++) { + if (fences[i].flags & I915_EXEC_FENCE_SIGNAL) { + drm_syncobj_replace_fence(file, syncobjs[i], + &eb.request->fence); + } + } + } + /* * Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2350,6 +2401,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, i915_gem_context_put(eb.ctx); err_destroy: eb_destroy(&eb); +err_syncobjs: + if (syncobjs) { + for (i = 0; i < args->num_cliprects; i++) + drm_syncobj_put(syncobjs[i]); + kvfree(syncobjs); + } err_out_fence: if (out_fence_fd != -1) put_unused_fd(out_fence_fd); @@ -2428,7 +2485,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, exec2_list[i].flags = 0; } - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list); + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL); if (exec2.flags & __EXEC_HAS_RELOC) { struct drm_i915_gem_exec_object __user *user_exec_list = u64_to_user_ptr(args->buffers_ptr); @@ -2460,6 +2517,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, const size_t sz = sizeof(struct drm_i915_gem_exec_object2); struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list; + struct drm_i915_gem_exec_fence *fence_list = NULL; int err; if (args->buffer_count < 1 || args->buffer_count > SIZE_MAX / sz - 1) { @@ -2486,7 +2544,33 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EFAULT; } - err = i915_gem_do_execbuffer(dev, file, args, exec2_list); + if (args->flags & I915_EXEC_FENCE_ARRAY) { + if (args->num_cliprects > UINT_MAX / sizeof(*exec2_list)) { + DRM_DEBUG("execbuf2 with %d fences\n", args->num_cliprects); + kvfree(exec2_list); + return -EINVAL; + } + fence_list = kvmalloc_array(args->num_cliprects, + sizeof(*fence_list), + __GFP_NOWARN | GFP_TEMPORARY); + if (fence_list == NULL) { + DRM_DEBUG("Failed to allocate fence list for %d fences\n", + args->num_cliprects); + kvfree(exec2_list); + return -ENOMEM; + } + if (copy_from_user(fence_list, + u64_to_user_ptr(args->cliprects_ptr), + sizeof(*fence_list) * args->num_cliprects)) { + DRM_DEBUG("copy %d fence entries failed\n", + args->num_cliprects); + kvfree(exec2_list); + kvfree(fence_list); + return -EFAULT; + } + } + + err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fence_list); /* * Now that we have begun execution of the batchbuffer, we ignore @@ -2517,5 +2601,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS; kvfree(exec2_list); + kvfree(fence_list); return err; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7ccbd6a..5b5f033 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -431,6 +431,11 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + typedef struct drm_i915_getparam { __s32 param; /* @@ -812,6 +817,17 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a dma-fence to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -826,7 +842,10 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -927,7 +946,14 @@ struct drm_i915_gem_execbuffer2 { * element). */ #define I915_EXEC_BATCH_FIRST (1<<18) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \