Message ID | 1449839521-21958-13-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 11/12/15 13:12, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > Various projects desire a mechanism for managing dependencies between > work items asynchronously. This can also include work items across > complete different and independent systems. For example, an > application wants to retreive a frame from a video in device, > using it for rendering on a GPU then send it to the video out device > for display all without having to stall waiting for completion along > the way. The sync framework allows this. It encapsulates > synchronisation events in file descriptors. The application can > request a sync point for the completion of each piece of work. Drivers > should also take sync points in with each new work request and not > schedule the work to start until the sync has been signalled. > > This patch adds sync framework support to the exec buffer IOCTL. A > sync point can be passed in to stall execution of the batch buffer > until signalled. And a sync point can be returned after each batch > buffer submission which will be signalled upon that batch buffer's > completion. > > At present, the input sync point is simply waited on synchronously > inside the exec buffer IOCTL call. Once the GPU scheduler arrives, > this will be handled asynchronously inside the scheduler and the IOCTL > can return without having to wait. > > Note also that the scheduler will re-order the execution of batch > buffers, e.g. because a batch buffer is stalled on a sync point and > cannot be submitted yet but other, independent, batch buffers are > being presented to the driver. This means that the timeline within the > sync points returned cannot be global to the engine. Instead they must > be kept per context per engine (the scheduler may not re-order batches > within a context). Hence the timeline cannot be based on the existing > seqno values but must be a new implementation. > > This patch is a port of work by several people that has been pulled > across from Android. It has been updated several times across several > patches. Rather than attempt to port each individual patch, this > version is the finished product as a single patch. The various > contributors/authors along the way (in addition to myself) were: > Satyanantha RamaGopal M <rama.gopal.m.satyanantha@intel.com> > Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Michel Thierry <michel.thierry@intel.com> > Arun Siluvery <arun.siluvery@linux.intel.com> > > v2: New patch in series. > > v3: Updated to use the new 'sync_fence_is_signaled' API rather than > having to know about the internal meaning of the 'fence::status' field > (which recently got inverted!) [work by Peter Lawthers]. > > Updated after review comments by Daniel Vetter. Removed '#ifdef > CONFIG_SYNC' and add 'select SYNC' to the Kconfig instead. Moved the > fd installation of fences to the end of the execbuff call to in order > to remove the need to use 'sys_close' to clean up on failure. > > Updated after review comments by Tvrtko Ursulin. Remvoed the > 'fence_external' flag as redundant. Covnerted DRM_ERRORs to > DRM_DEBUGs. Changed one second wait to a wait forever when waiting on > incoming fences. > > v4: Re-instated missing return of fd to user land that somehow got > lost in the anti-sys_close() re-factor. > > For: VIZ-5190 > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > Signed-off-by: Peter Lawthers <peter.lawthers@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Daniel Vetter <daniel@ffwll.ch> > --- > drivers/gpu/drm/i915/Kconfig | 3 + > drivers/gpu/drm/i915/i915_drv.h | 6 ++ > drivers/gpu/drm/i915/i915_gem.c | 89 +++++++++++++++++++++++++++- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 95 ++++++++++++++++++++++++++++-- > include/uapi/drm/i915_drm.h | 16 ++++- > 5 files changed, 200 insertions(+), 9 deletions(-) > > diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig > index 1d96fe1..cb5d5b2 100644 > --- a/drivers/gpu/drm/i915/Kconfig > +++ b/drivers/gpu/drm/i915/Kconfig > @@ -22,6 +22,9 @@ config DRM_I915 > select ACPI_VIDEO if ACPI > select ACPI_BUTTON if ACPI > select MMU_NOTIFIER select MMU_NOTIFIER is not upstream! :) > + # ANDROID is required for SYNC > + select ANDROID > + select SYNC > help > Choose this option if you have a system that has "Intel Graphics > Media Accelerator" or "HD Graphics" integrated graphics, > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index d013c6d..194bca0 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2278,6 +2278,12 @@ void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked); > int i915_create_fence_timeline(struct drm_device *dev, > struct intel_context *ctx, > struct intel_engine_cs *ring); > +struct sync_fence; > +int i915_create_sync_fence(struct drm_i915_gem_request *req, > + struct sync_fence **sync_fence, int *fence_fd); > +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, > + struct sync_fence *sync_fence, int fence_fd); > +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); > > static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) > { > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 4817015..279d79f 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -37,6 +37,7 @@ > #include <linux/swap.h> > #include <linux/pci.h> > #include <linux/dma-buf.h> > +#include <../drivers/android/sync.h> > > #define RQ_BUG_ON(expr) > > @@ -2560,7 +2561,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, > > /* > * Add the fence to the pending list before emitting the commands to > - * generate a seqno notification interrupt. > + * generate a seqno notification interrupt. This will also enable > + * interrupts if 'signal_requested' has been set. > + * > + * For example, if an exported sync point has been requested for this > + * request then it can be waited on without the driver's knowledge, > + * i.e. without calling __i915_wait_request(). Thus interrupts must > + * be enabled from the start rather than only on demand. > */ > i915_gem_request_submit(request); > > @@ -2901,6 +2908,86 @@ static unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t > return seqno; > } > > +int i915_create_sync_fence(struct drm_i915_gem_request *req, > + struct sync_fence **sync_fence, int *fence_fd) > +{ > + char ring_name[] = "i915_ring0"; > + int fd; > + > + fd = get_unused_fd_flags(O_CLOEXEC); > + if (fd < 0) { > + DRM_DEBUG("No available file descriptors!\n"); > + *fence_fd = -1; > + return fd; > + } > + > + ring_name[9] += req->ring->id; I think this will possibly blow up if CONFIG_DEBUG_RODATA is set, which is the case on most kernels. So I think you need to make a local copy with kstrdup and free it after calling sync_fence_create_dma. > + *sync_fence = sync_fence_create_dma(ring_name, &req->fence); > + if (!*sync_fence) { > + put_unused_fd(fd); > + *fence_fd = -1; > + return -ENOMEM; > + } > + > + *fence_fd = fd; > + > + return 0; > +} > + > +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, > + struct sync_fence *sync_fence, int fence_fd) > +{ > + sync_fence_install(sync_fence, fence_fd); > + > + /* > + * NB: The corresponding put happens automatically on file close > + * from sync_fence_release() via the fops callback. > + */ > + fence_get(&req->fence); > + > + /* > + * The sync framework adds a callback to the fence. The fence > + * framework calls 'enable_signalling' when a callback is added. > + * Thus this flag should have been set by now. If not then > + * 'enable_signalling' must be called explicitly because exporting > + * a fence to user land means it can be waited on asynchronously and > + * thus must be signalled asynchronously. > + */ > + WARN_ON(!req->signal_requested); > +} > + > +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *sync_fence) > +{ > + struct fence *dma_fence; > + struct drm_i915_gem_request *req; > + int i; > + > + if (sync_fence_is_signaled(sync_fence)) > + return true; > + > + for(i = 0; i < sync_fence->num_fences; i++) { > + dma_fence = sync_fence->cbs[i].sync_pt; > + > + /* No need to worry about dead points: */ > + if (fence_is_signaled(dma_fence)) > + continue; > + > + /* Can't ignore other people's points: */ Maybe add "unsignaled" to qualify. > + if(dma_fence->ops != &i915_gem_request_fops) > + return false; > + > + req = container_of(dma_fence, typeof(*req), fence); > + > + /* Can't ignore points on other rings: */ > + if (req->ring != ring) > + return false; > + > + /* Same ring means guaranteed to be in order so ignore it. */ > + } > + > + return true; > +} > + > int i915_gem_request_alloc(struct intel_engine_cs *ring, > struct intel_context *ctx, > struct drm_i915_gem_request **req_out) > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index bfc4c17..5f629f8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -26,6 +26,7 @@ > * > */ > > +#include <linux/syscalls.h> > #include <drm/drmP.h> > #include <drm/i915_drm.h> > #include "i915_drv.h" > @@ -33,6 +34,7 @@ > #include "intel_drv.h" > #include <linux/dma_remapping.h> > #include <linux/uaccess.h> > +#include <../drivers/android/sync.h> > > #define __EXEC_OBJECT_HAS_PIN (1<<31) > #define __EXEC_OBJECT_HAS_FENCE (1<<30) > @@ -1322,6 +1324,38 @@ eb_get_batch(struct eb_vmas *eb) > return vma->obj; > } > > +static int i915_early_fence_wait(struct intel_engine_cs *ring, int fence_fd) > +{ > + struct sync_fence *fence; > + int ret = 0; > + > + if (fence_fd < 0) { > + DRM_DEBUG("Invalid wait fence fd %d on ring %d\n", fence_fd, > + (int) ring->id); > + return 1; Suggest adding kerneldoc describing return values from this function. It wasn't immediately clear to me what one means. But I am also not sure that invalid fd shouldn't be an outright error instead of allowing execbuf to contiue. > + } > + > + fence = sync_fence_fdget(fence_fd); > + if (fence == NULL) { > + DRM_DEBUG("Invalid wait fence %d on ring %d\n", fence_fd, > + (int) ring->id); > + return 1; > + } > + > + if (!sync_fence_is_signaled(fence)) { Minor comment, but i915_safe_to_ignore_fence checks this as well so you could remove it here. > + /* > + * Wait forever for the fence to be signalled. This is safe > + * because the the mutex lock has not yet been acquired and > + * the wait is interruptible. > + */ > + if (!i915_safe_to_ignore_fence(ring, fence)) > + ret = sync_fence_wait(fence, -1); > + } > + > + sync_fence_put(fence); > + return ret; > +} > + > static int > i915_gem_do_execbuffer(struct drm_device *dev, void *data, > struct drm_file *file, > @@ -1341,6 +1375,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > u32 dispatch_flags; > int ret; > bool need_relocs; > + int fd_fence_complete = -1; > + int fd_fence_wait = lower_32_bits(args->rsvd2); > + struct sync_fence *sync_fence; > + > + /* > + * Make sure an broken fence handle is not returned no matter > + * how early an error might be hit. Note that rsvd2 has to be > + * saved away first because it is also an input parameter! > + */ Instead of the 2nd sentence maybe say something like "Note that we have saved rsvd2 already for later use since it is also in input parameter!". Like written I was expecting the code following the comment to do that, and then was confused when it didn't. Or maybe my attention span is too short. > + if (args->flags & I915_EXEC_CREATE_FENCE) > + args->rsvd2 = (__u64) -1; > > if (!i915_gem_check_execbuffer(args)) > return -EINVAL; > @@ -1424,6 +1469,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > dispatch_flags |= I915_DISPATCH_RS; > } > > + /* > + * Without a GPU scheduler, any fence waits must be done up front. > + */ > + if (args->flags & I915_EXEC_WAIT_FENCE) { > + ret = i915_early_fence_wait(ring, fd_fence_wait); > + if (ret < 0) > + return ret; > + > + args->flags &= ~I915_EXEC_WAIT_FENCE; > + } > + > intel_runtime_pm_get(dev_priv); > > ret = i915_mutex_lock_interruptible(dev); > @@ -1571,8 +1627,41 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, > params->batch_obj = batch_obj; > params->ctx = ctx; > > + if (args->flags & I915_EXEC_CREATE_FENCE) { > + /* > + * Caller has requested a sync fence. > + * User interrupts will be enabled to make sure that > + * the timeline is signalled on completion. > + */ Is it signaled or signalled? There is a lot of usage of both throughout the patches and I as a non-native speaker am amu^H^H^Hconfused. ;) > + ret = i915_create_sync_fence(params->request, &sync_fence, > + &fd_fence_complete); > + if (ret) { > + DRM_ERROR("Fence creation failed for ring %d, ctx %p\n", > + ring->id, ctx); > + goto err_batch_unpin; > + } > + } > + > ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); > > + if (fd_fence_complete != -1) { > + if (ret) { > + sync_fence_put(sync_fence); > + put_unused_fd(fd_fence_complete); > + } else { > + /* > + * Install the fence into the pre-allocated file > + * descriptor to the fence object so that user land > + * can wait on it... > + */ > + i915_install_sync_fence_fd(params->request, > + sync_fence, fd_fence_complete); > + > + /* Return the fence through the rsvd2 field */ > + args->rsvd2 = (__u64) fd_fence_complete; > + } > + } > + > err_batch_unpin: > /* > * FIXME: We crucially rely upon the active tracking for the (ppgtt) > @@ -1602,6 +1691,7 @@ pre_mutex_err: > /* intel_gpu_busy should also get a ref, so it will free when the device > * is really idle. */ > intel_runtime_pm_put(dev_priv); > + > return ret; > } > > @@ -1707,11 +1797,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, > return -EINVAL; > } > > - if (args->rsvd2 != 0) { > - DRM_DEBUG("dirty rvsd2 field\n"); > - return -EINVAL; > - } > - > exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, > GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); > if (exec2_list == NULL) > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index 67cebe6..86f7921 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -250,7 +250,7 @@ typedef struct _drm_i915_sarea { > #define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) > #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) > #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) > -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) > +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) > #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) > #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) > #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) > @@ -695,7 +695,7 @@ struct drm_i915_gem_exec_object2 { > __u64 flags; > > __u64 rsvd1; > - __u64 rsvd2; > + __u64 rsvd2; /* Used for fence fd */ > }; > > struct drm_i915_gem_execbuffer2 { > @@ -776,7 +776,17 @@ struct drm_i915_gem_execbuffer2 { > */ > #define I915_EXEC_RESOURCE_STREAMER (1<<15) > > -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) > +/** Caller supplies a sync fence fd in the rsvd2 field. > + * Wait for it to be signalled before starting the work > + */ > +#define I915_EXEC_WAIT_FENCE (1<<16) > + > +/** Caller wants a sync fence fd for this execbuffer. > + * It will be returned in rsvd2 > + */ > +#define I915_EXEC_CREATE_FENCE (1<<17) > + > +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_CREATE_FENCE<<1) > > #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) > #define i915_execbuffer2_set_context_id(eb2, context) \ > Regards, Tvrtko
On 11/12/2015 15:29, Tvrtko Ursulin wrote: > > > On 11/12/15 13:12, John.C.Harrison@Intel.com wrote: >> From: John Harrison <John.C.Harrison@Intel.com> >> >> Various projects desire a mechanism for managing dependencies between >> work items asynchronously. This can also include work items across >> complete different and independent systems. For example, an >> application wants to retreive a frame from a video in device, >> using it for rendering on a GPU then send it to the video out device >> for display all without having to stall waiting for completion along >> the way. The sync framework allows this. It encapsulates >> synchronisation events in file descriptors. The application can >> request a sync point for the completion of each piece of work. Drivers >> should also take sync points in with each new work request and not >> schedule the work to start until the sync has been signalled. >> >> This patch adds sync framework support to the exec buffer IOCTL. A >> sync point can be passed in to stall execution of the batch buffer >> until signalled. And a sync point can be returned after each batch >> buffer submission which will be signalled upon that batch buffer's >> completion. >> >> At present, the input sync point is simply waited on synchronously >> inside the exec buffer IOCTL call. Once the GPU scheduler arrives, >> this will be handled asynchronously inside the scheduler and the IOCTL >> can return without having to wait. >> >> Note also that the scheduler will re-order the execution of batch >> buffers, e.g. because a batch buffer is stalled on a sync point and >> cannot be submitted yet but other, independent, batch buffers are >> being presented to the driver. This means that the timeline within the >> sync points returned cannot be global to the engine. Instead they must >> be kept per context per engine (the scheduler may not re-order batches >> within a context). Hence the timeline cannot be based on the existing >> seqno values but must be a new implementation. >> >> This patch is a port of work by several people that has been pulled >> across from Android. It has been updated several times across several >> patches. Rather than attempt to port each individual patch, this >> version is the finished product as a single patch. The various >> contributors/authors along the way (in addition to myself) were: >> Satyanantha RamaGopal M <rama.gopal.m.satyanantha@intel.com> >> Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Michel Thierry <michel.thierry@intel.com> >> Arun Siluvery <arun.siluvery@linux.intel.com> >> >> v2: New patch in series. >> >> v3: Updated to use the new 'sync_fence_is_signaled' API rather than >> having to know about the internal meaning of the 'fence::status' field >> (which recently got inverted!) [work by Peter Lawthers]. >> >> Updated after review comments by Daniel Vetter. Removed '#ifdef >> CONFIG_SYNC' and add 'select SYNC' to the Kconfig instead. Moved the >> fd installation of fences to the end of the execbuff call to in order >> to remove the need to use 'sys_close' to clean up on failure. >> >> Updated after review comments by Tvrtko Ursulin. Remvoed the >> 'fence_external' flag as redundant. Covnerted DRM_ERRORs to >> DRM_DEBUGs. Changed one second wait to a wait forever when waiting on >> incoming fences. >> >> v4: Re-instated missing return of fd to user land that somehow got >> lost in the anti-sys_close() re-factor. >> >> For: VIZ-5190 >> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >> Signed-off-by: Peter Lawthers <peter.lawthers@intel.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Daniel Vetter <daniel@ffwll.ch> >> --- >> drivers/gpu/drm/i915/Kconfig | 3 + >> drivers/gpu/drm/i915/i915_drv.h | 6 ++ >> drivers/gpu/drm/i915/i915_gem.c | 89 >> +++++++++++++++++++++++++++- >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 95 >> ++++++++++++++++++++++++++++-- >> include/uapi/drm/i915_drm.h | 16 ++++- >> 5 files changed, 200 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig >> index 1d96fe1..cb5d5b2 100644 >> --- a/drivers/gpu/drm/i915/Kconfig >> +++ b/drivers/gpu/drm/i915/Kconfig >> @@ -22,6 +22,9 @@ config DRM_I915 >> select ACPI_VIDEO if ACPI >> select ACPI_BUTTON if ACPI >> select MMU_NOTIFIER > > select MMU_NOTIFIER is not upstream! :) Oops. That's from a patch not being upstreamed. It is required for OCL2.0 which is something we are using to test the scheduler. > >> + # ANDROID is required for SYNC >> + select ANDROID >> + select SYNC >> help >> Choose this option if you have a system that has "Intel Graphics >> Media Accelerator" or "HD Graphics" integrated graphics, >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index d013c6d..194bca0 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2278,6 +2278,12 @@ void i915_gem_request_notify(struct >> intel_engine_cs *ring, bool fence_locked); >> int i915_create_fence_timeline(struct drm_device *dev, >> struct intel_context *ctx, >> struct intel_engine_cs *ring); >> +struct sync_fence; >> +int i915_create_sync_fence(struct drm_i915_gem_request *req, >> + struct sync_fence **sync_fence, int *fence_fd); >> +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, >> + struct sync_fence *sync_fence, int fence_fd); >> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct >> sync_fence *fence); >> >> static inline bool i915_gem_request_completed(struct >> drm_i915_gem_request *req) >> { >> diff --git a/drivers/gpu/drm/i915/i915_gem.c >> b/drivers/gpu/drm/i915/i915_gem.c >> index 4817015..279d79f 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -37,6 +37,7 @@ >> #include <linux/swap.h> >> #include <linux/pci.h> >> #include <linux/dma-buf.h> >> +#include <../drivers/android/sync.h> >> >> #define RQ_BUG_ON(expr) >> >> @@ -2560,7 +2561,13 @@ void __i915_add_request(struct >> drm_i915_gem_request *request, >> >> /* >> * Add the fence to the pending list before emitting the >> commands to >> - * generate a seqno notification interrupt. >> + * generate a seqno notification interrupt. This will also enable >> + * interrupts if 'signal_requested' has been set. >> + * >> + * For example, if an exported sync point has been requested for >> this >> + * request then it can be waited on without the driver's knowledge, >> + * i.e. without calling __i915_wait_request(). Thus interrupts must >> + * be enabled from the start rather than only on demand. >> */ >> i915_gem_request_submit(request); >> >> @@ -2901,6 +2908,86 @@ static unsigned >> i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t >> return seqno; >> } >> >> +int i915_create_sync_fence(struct drm_i915_gem_request *req, >> + struct sync_fence **sync_fence, int *fence_fd) >> +{ >> + char ring_name[] = "i915_ring0"; >> + int fd; >> + >> + fd = get_unused_fd_flags(O_CLOEXEC); >> + if (fd < 0) { >> + DRM_DEBUG("No available file descriptors!\n"); >> + *fence_fd = -1; >> + return fd; >> + } >> + >> + ring_name[9] += req->ring->id; > > I think this will possibly blow up if CONFIG_DEBUG_RODATA is set, > which is the case on most kernels. > > So I think you need to make a local copy with kstrdup and free it > after calling sync_fence_create_dma. What will blow up? The ring_name local is a stack array not a pointer to the data segment. Did you miss the '[]'? > >> + *sync_fence = sync_fence_create_dma(ring_name, &req->fence); >> + if (!*sync_fence) { >> + put_unused_fd(fd); >> + *fence_fd = -1; >> + return -ENOMEM; >> + } >> + >> + *fence_fd = fd; >> + >> + return 0; >> +} >> + >> +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, >> + struct sync_fence *sync_fence, int fence_fd) >> +{ >> + sync_fence_install(sync_fence, fence_fd); >> + >> + /* >> + * NB: The corresponding put happens automatically on file close >> + * from sync_fence_release() via the fops callback. >> + */ >> + fence_get(&req->fence); >> + >> + /* >> + * The sync framework adds a callback to the fence. The fence >> + * framework calls 'enable_signalling' when a callback is added. >> + * Thus this flag should have been set by now. If not then >> + * 'enable_signalling' must be called explicitly because exporting >> + * a fence to user land means it can be waited on asynchronously >> and >> + * thus must be signalled asynchronously. >> + */ >> + WARN_ON(!req->signal_requested); >> +} >> + >> +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct >> sync_fence *sync_fence) >> +{ >> + struct fence *dma_fence; >> + struct drm_i915_gem_request *req; >> + int i; >> + >> + if (sync_fence_is_signaled(sync_fence)) >> + return true; >> + >> + for(i = 0; i < sync_fence->num_fences; i++) { >> + dma_fence = sync_fence->cbs[i].sync_pt; >> + >> + /* No need to worry about dead points: */ >> + if (fence_is_signaled(dma_fence)) >> + continue; >> + >> + /* Can't ignore other people's points: */ > > Maybe add "unsignaled" to qualify. The test above filters out anything that is signalled (or errored). Stating that again on each subsequent test seems unnecessarily verbose. > >> + if(dma_fence->ops != &i915_gem_request_fops) >> + return false; >> + >> + req = container_of(dma_fence, typeof(*req), fence); >> + >> + /* Can't ignore points on other rings: */ >> + if (req->ring != ring) >> + return false; >> + >> + /* Same ring means guaranteed to be in order so ignore it. */ >> + } >> + >> + return true; >> +} >> + >> int i915_gem_request_alloc(struct intel_engine_cs *ring, >> struct intel_context *ctx, >> struct drm_i915_gem_request **req_out) >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index bfc4c17..5f629f8 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -26,6 +26,7 @@ >> * >> */ >> >> +#include <linux/syscalls.h> >> #include <drm/drmP.h> >> #include <drm/i915_drm.h> >> #include "i915_drv.h" >> @@ -33,6 +34,7 @@ >> #include "intel_drv.h" >> #include <linux/dma_remapping.h> >> #include <linux/uaccess.h> >> +#include <../drivers/android/sync.h> >> >> #define __EXEC_OBJECT_HAS_PIN (1<<31) >> #define __EXEC_OBJECT_HAS_FENCE (1<<30) >> @@ -1322,6 +1324,38 @@ eb_get_batch(struct eb_vmas *eb) >> return vma->obj; >> } >> >> +static int i915_early_fence_wait(struct intel_engine_cs *ring, int >> fence_fd) >> +{ >> + struct sync_fence *fence; >> + int ret = 0; >> + >> + if (fence_fd < 0) { >> + DRM_DEBUG("Invalid wait fence fd %d on ring %d\n", fence_fd, >> + (int) ring->id); >> + return 1; > > Suggest adding kerneldoc describing return values from this function. > > It wasn't immediately clear to me what one means. To be honest, I think the one is left over from an earlier iteration of the code which did things slightly differently. It probably could just return zero or -error. > But I am also not sure that invalid fd shouldn't be an outright error > instead of allowing execbuf to contiue. Wasn't sure if it was possible for a fence to be invalidated behind the back of an application. And you don't want to reject rendering from one app just because another app went splat. I guess even if the underlying fence has been destroyed, the fd will still be private to the current app and hence valid. So maybe it should just return -EINVAL or some such of the fd itself is toast. > >> + } >> + >> + fence = sync_fence_fdget(fence_fd); >> + if (fence == NULL) { >> + DRM_DEBUG("Invalid wait fence %d on ring %d\n", fence_fd, >> + (int) ring->id); >> + return 1; >> + } >> + >> + if (!sync_fence_is_signaled(fence)) { > > Minor comment, but i915_safe_to_ignore_fence checks this as well so > you could remove it here. > >> + /* >> + * Wait forever for the fence to be signalled. This is safe >> + * because the the mutex lock has not yet been acquired and >> + * the wait is interruptible. >> + */ >> + if (!i915_safe_to_ignore_fence(ring, fence)) >> + ret = sync_fence_wait(fence, -1); >> + } >> + >> + sync_fence_put(fence); >> + return ret; >> +} >> + >> static int >> i915_gem_do_execbuffer(struct drm_device *dev, void *data, >> struct drm_file *file, >> @@ -1341,6 +1375,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> void *data, >> u32 dispatch_flags; >> int ret; >> bool need_relocs; >> + int fd_fence_complete = -1; >> + int fd_fence_wait = lower_32_bits(args->rsvd2); >> + struct sync_fence *sync_fence; >> + >> + /* >> + * Make sure an broken fence handle is not returned no matter >> + * how early an error might be hit. Note that rsvd2 has to be >> + * saved away first because it is also an input parameter! >> + */ > > Instead of the 2nd sentence maybe say something like "Note that we > have saved rsvd2 already for later use since it is also in input > parameter!". Like written I was expecting the code following the > comment to do that, and then was confused when it didn't. Or maybe my > attention span is too short. Will update the comment for those who can't remember what they read two lines earlier... > >> + if (args->flags & I915_EXEC_CREATE_FENCE) >> + args->rsvd2 = (__u64) -1; >> >> if (!i915_gem_check_execbuffer(args)) >> return -EINVAL; >> @@ -1424,6 +1469,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> void *data, >> dispatch_flags |= I915_DISPATCH_RS; >> } >> >> + /* >> + * Without a GPU scheduler, any fence waits must be done up front. >> + */ >> + if (args->flags & I915_EXEC_WAIT_FENCE) { >> + ret = i915_early_fence_wait(ring, fd_fence_wait); >> + if (ret < 0) >> + return ret; >> + >> + args->flags &= ~I915_EXEC_WAIT_FENCE; >> + } >> + >> intel_runtime_pm_get(dev_priv); >> >> ret = i915_mutex_lock_interruptible(dev); >> @@ -1571,8 +1627,41 @@ i915_gem_do_execbuffer(struct drm_device *dev, >> void *data, >> params->batch_obj = batch_obj; >> params->ctx = ctx; >> >> + if (args->flags & I915_EXEC_CREATE_FENCE) { >> + /* >> + * Caller has requested a sync fence. >> + * User interrupts will be enabled to make sure that >> + * the timeline is signalled on completion. >> + */ > > Is it signaled or signalled? There is a lot of usage of both > throughout the patches and I as a non-native speaker am > amu^H^H^Hconfused. ;) It depends which side of the Atlantic you are on. English has a double l, American just a single. So largely it depends who wrote the code/comment and who (if anyone!) has reviewed it. > >> + ret = i915_create_sync_fence(params->request, &sync_fence, >> + &fd_fence_complete); >> + if (ret) { >> + DRM_ERROR("Fence creation failed for ring %d, ctx %p\n", >> + ring->id, ctx); >> + goto err_batch_unpin; >> + } >> + } >> + >> ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); >> >> + if (fd_fence_complete != -1) { >> + if (ret) { >> + sync_fence_put(sync_fence); >> + put_unused_fd(fd_fence_complete); >> + } else { >> + /* >> + * Install the fence into the pre-allocated file >> + * descriptor to the fence object so that user land >> + * can wait on it... >> + */ >> + i915_install_sync_fence_fd(params->request, >> + sync_fence, fd_fence_complete); >> + >> + /* Return the fence through the rsvd2 field */ >> + args->rsvd2 = (__u64) fd_fence_complete; >> + } >> + } >> + >> err_batch_unpin: >> /* >> * FIXME: We crucially rely upon the active tracking for the >> (ppgtt) >> @@ -1602,6 +1691,7 @@ pre_mutex_err: >> /* intel_gpu_busy should also get a ref, so it will free when >> the device >> * is really idle. */ >> intel_runtime_pm_put(dev_priv); >> + >> return ret; >> } >> >> @@ -1707,11 +1797,6 @@ i915_gem_execbuffer2(struct drm_device *dev, >> void *data, >> return -EINVAL; >> } >> >> - if (args->rsvd2 != 0) { >> - DRM_DEBUG("dirty rvsd2 field\n"); >> - return -EINVAL; >> - } >> - >> exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, >> GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); >> if (exec2_list == NULL) >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 67cebe6..86f7921 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -250,7 +250,7 @@ typedef struct _drm_i915_sarea { >> #define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + >> DRM_I915_HWS_ADDR, struct drm_i915_gem_init) >> #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + >> DRM_I915_GEM_INIT, struct drm_i915_gem_init) >> #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + >> DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) >> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + >> DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) >> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOWR(DRM_COMMAND_BASE + >> DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) >> #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + >> DRM_I915_GEM_PIN, struct drm_i915_gem_pin) >> #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + >> DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) >> #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + >> DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) >> @@ -695,7 +695,7 @@ struct drm_i915_gem_exec_object2 { >> __u64 flags; >> >> __u64 rsvd1; >> - __u64 rsvd2; >> + __u64 rsvd2; /* Used for fence fd */ >> }; >> >> struct drm_i915_gem_execbuffer2 { >> @@ -776,7 +776,17 @@ struct drm_i915_gem_execbuffer2 { >> */ >> #define I915_EXEC_RESOURCE_STREAMER (1<<15) >> >> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) >> +/** Caller supplies a sync fence fd in the rsvd2 field. >> + * Wait for it to be signalled before starting the work >> + */ >> +#define I915_EXEC_WAIT_FENCE (1<<16) >> + >> +/** Caller wants a sync fence fd for this execbuffer. >> + * It will be returned in rsvd2 >> + */ >> +#define I915_EXEC_CREATE_FENCE (1<<17) >> + >> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_CREATE_FENCE<<1) >> >> #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) >> #define i915_execbuffer2_set_context_id(eb2, context) \ >> > > Regards, > > Tvrtko >
On Mon, Dec 14, 2015 at 11:46:22AM +0000, John Harrison wrote: > >>@@ -1341,6 +1375,17 @@ i915_gem_do_execbuffer(struct drm_device > >>*dev, void *data, > >> u32 dispatch_flags; > >> int ret; > >> bool need_relocs; > >>+ int fd_fence_complete = -1; > >>+ int fd_fence_wait = lower_32_bits(args->rsvd2); > >>+ struct sync_fence *sync_fence; > >>+ > >>+ /* > >>+ * Make sure an broken fence handle is not returned no matter > >>+ * how early an error might be hit. Note that rsvd2 has to be > >>+ * saved away first because it is also an input parameter! > >>+ */ > > > >Instead of the 2nd sentence maybe say something like "Note that we > >have saved rsvd2 already for later use since it is also in input > >parameter!". Like written I was expecting the code following the > >comment to do that, and then was confused when it didn't. Or maybe > >my attention span is too short. > Will update the comment for those who can't remember what they read > two lines earlier... Honestly, I thought the complaint here would be that the user's input parameter is being modified on the error path breaking the ABI i.e. drmIoctl() will not work. -Chris
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 1d96fe1..cb5d5b2 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -22,6 +22,9 @@ config DRM_I915 select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI select MMU_NOTIFIER + # ANDROID is required for SYNC + select ANDROID + select SYNC help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d013c6d..194bca0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2278,6 +2278,12 @@ void i915_gem_request_notify(struct intel_engine_cs *ring, bool fence_locked); int i915_create_fence_timeline(struct drm_device *dev, struct intel_context *ctx, struct intel_engine_cs *ring); +struct sync_fence; +int i915_create_sync_fence(struct drm_i915_gem_request *req, + struct sync_fence **sync_fence, int *fence_fd); +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, + struct sync_fence *sync_fence, int fence_fd); +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *fence); static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4817015..279d79f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,6 +37,7 @@ #include <linux/swap.h> #include <linux/pci.h> #include <linux/dma-buf.h> +#include <../drivers/android/sync.h> #define RQ_BUG_ON(expr) @@ -2560,7 +2561,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, /* * Add the fence to the pending list before emitting the commands to - * generate a seqno notification interrupt. + * generate a seqno notification interrupt. This will also enable + * interrupts if 'signal_requested' has been set. + * + * For example, if an exported sync point has been requested for this + * request then it can be waited on without the driver's knowledge, + * i.e. without calling __i915_wait_request(). Thus interrupts must + * be enabled from the start rather than only on demand. */ i915_gem_request_submit(request); @@ -2901,6 +2908,86 @@ static unsigned i915_fence_timeline_get_next_seqno(struct i915_fence_timeline *t return seqno; } +int i915_create_sync_fence(struct drm_i915_gem_request *req, + struct sync_fence **sync_fence, int *fence_fd) +{ + char ring_name[] = "i915_ring0"; + int fd; + + fd = get_unused_fd_flags(O_CLOEXEC); + if (fd < 0) { + DRM_DEBUG("No available file descriptors!\n"); + *fence_fd = -1; + return fd; + } + + ring_name[9] += req->ring->id; + *sync_fence = sync_fence_create_dma(ring_name, &req->fence); + if (!*sync_fence) { + put_unused_fd(fd); + *fence_fd = -1; + return -ENOMEM; + } + + *fence_fd = fd; + + return 0; +} + +void i915_install_sync_fence_fd(struct drm_i915_gem_request *req, + struct sync_fence *sync_fence, int fence_fd) +{ + sync_fence_install(sync_fence, fence_fd); + + /* + * NB: The corresponding put happens automatically on file close + * from sync_fence_release() via the fops callback. + */ + fence_get(&req->fence); + + /* + * The sync framework adds a callback to the fence. The fence + * framework calls 'enable_signalling' when a callback is added. + * Thus this flag should have been set by now. If not then + * 'enable_signalling' must be called explicitly because exporting + * a fence to user land means it can be waited on asynchronously and + * thus must be signalled asynchronously. + */ + WARN_ON(!req->signal_requested); +} + +bool i915_safe_to_ignore_fence(struct intel_engine_cs *ring, struct sync_fence *sync_fence) +{ + struct fence *dma_fence; + struct drm_i915_gem_request *req; + int i; + + if (sync_fence_is_signaled(sync_fence)) + return true; + + for(i = 0; i < sync_fence->num_fences; i++) { + dma_fence = sync_fence->cbs[i].sync_pt; + + /* No need to worry about dead points: */ + if (fence_is_signaled(dma_fence)) + continue; + + /* Can't ignore other people's points: */ + if(dma_fence->ops != &i915_gem_request_fops) + return false; + + req = container_of(dma_fence, typeof(*req), fence); + + /* Can't ignore points on other rings: */ + if (req->ring != ring) + return false; + + /* Same ring means guaranteed to be in order so ignore it. */ + } + + return true; +} + int i915_gem_request_alloc(struct intel_engine_cs *ring, struct intel_context *ctx, struct drm_i915_gem_request **req_out) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index bfc4c17..5f629f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -26,6 +26,7 @@ * */ +#include <linux/syscalls.h> #include <drm/drmP.h> #include <drm/i915_drm.h> #include "i915_drv.h" @@ -33,6 +34,7 @@ #include "intel_drv.h" #include <linux/dma_remapping.h> #include <linux/uaccess.h> +#include <../drivers/android/sync.h> #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) @@ -1322,6 +1324,38 @@ eb_get_batch(struct eb_vmas *eb) return vma->obj; } +static int i915_early_fence_wait(struct intel_engine_cs *ring, int fence_fd) +{ + struct sync_fence *fence; + int ret = 0; + + if (fence_fd < 0) { + DRM_DEBUG("Invalid wait fence fd %d on ring %d\n", fence_fd, + (int) ring->id); + return 1; + } + + fence = sync_fence_fdget(fence_fd); + if (fence == NULL) { + DRM_DEBUG("Invalid wait fence %d on ring %d\n", fence_fd, + (int) ring->id); + return 1; + } + + if (!sync_fence_is_signaled(fence)) { + /* + * Wait forever for the fence to be signalled. This is safe + * because the the mutex lock has not yet been acquired and + * the wait is interruptible. + */ + if (!i915_safe_to_ignore_fence(ring, fence)) + ret = sync_fence_wait(fence, -1); + } + + sync_fence_put(fence); + return ret; +} + static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, @@ -1341,6 +1375,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, u32 dispatch_flags; int ret; bool need_relocs; + int fd_fence_complete = -1; + int fd_fence_wait = lower_32_bits(args->rsvd2); + struct sync_fence *sync_fence; + + /* + * Make sure an broken fence handle is not returned no matter + * how early an error might be hit. Note that rsvd2 has to be + * saved away first because it is also an input parameter! + */ + if (args->flags & I915_EXEC_CREATE_FENCE) + args->rsvd2 = (__u64) -1; if (!i915_gem_check_execbuffer(args)) return -EINVAL; @@ -1424,6 +1469,17 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + /* + * Without a GPU scheduler, any fence waits must be done up front. + */ + if (args->flags & I915_EXEC_WAIT_FENCE) { + ret = i915_early_fence_wait(ring, fd_fence_wait); + if (ret < 0) + return ret; + + args->flags &= ~I915_EXEC_WAIT_FENCE; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); @@ -1571,8 +1627,41 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->batch_obj = batch_obj; params->ctx = ctx; + if (args->flags & I915_EXEC_CREATE_FENCE) { + /* + * Caller has requested a sync fence. + * User interrupts will be enabled to make sure that + * the timeline is signalled on completion. + */ + ret = i915_create_sync_fence(params->request, &sync_fence, + &fd_fence_complete); + if (ret) { + DRM_ERROR("Fence creation failed for ring %d, ctx %p\n", + ring->id, ctx); + goto err_batch_unpin; + } + } + ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); + if (fd_fence_complete != -1) { + if (ret) { + sync_fence_put(sync_fence); + put_unused_fd(fd_fence_complete); + } else { + /* + * Install the fence into the pre-allocated file + * descriptor to the fence object so that user land + * can wait on it... + */ + i915_install_sync_fence_fd(params->request, + sync_fence, fd_fence_complete); + + /* Return the fence through the rsvd2 field */ + args->rsvd2 = (__u64) fd_fence_complete; + } + } + err_batch_unpin: /* * FIXME: We crucially rely upon the active tracking for the (ppgtt) @@ -1602,6 +1691,7 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + return ret; } @@ -1707,11 +1797,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); if (exec2_list == NULL) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 67cebe6..86f7921 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -250,7 +250,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) -#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -695,7 +695,7 @@ struct drm_i915_gem_exec_object2 { __u64 flags; __u64 rsvd1; - __u64 rsvd2; + __u64 rsvd2; /* Used for fence fd */ }; struct drm_i915_gem_execbuffer2 { @@ -776,7 +776,17 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_RESOURCE_STREAMER (1<<15) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) +/** Caller supplies a sync fence fd in the rsvd2 field. + * Wait for it to be signalled before starting the work + */ +#define I915_EXEC_WAIT_FENCE (1<<16) + +/** Caller wants a sync fence fd for this execbuffer. + * It will be returned in rsvd2 + */ +#define I915_EXEC_CREATE_FENCE (1<<17) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_CREATE_FENCE<<1) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \