Message ID | 1432917856-12261-26-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 29/05/2015 17:43, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > The plan is to pass requests around as the basic submission tracking structure > rather than rings and contexts. This patch updates the i915_gem_object_sync() > code path. > > v2: Much more complex patch to share a single request between the sync and the > page flip. The _sync() function now supports lazy allocation of the request > structure. That is, if one is passed in then that will be used. If one is not, > then a request will be allocated and passed back out. Note that the _sync() code > does not necessarily require a request. Thus one will only be created until > certain situations. The reason the lazy allocation must be done within the > _sync() code itself is because the decision to need one or not is not really > something that code above can second guess (except in the case where one is > definitely not required because no ring is passed in). > > The call chains above _sync() now support passing a request through which most > callers passing in NULL and assuming that no request will be required (because > they also pass in NULL for the ring and therefore can't be generating any ring > code). > > The exeception is intel_crtc_page_flip() which now supports having a request 1. "The exeception" -> "The exception" > returned from _sync(). If one is, then that request is shared by the page flip > (if the page flip is of a type to need a request). If _sync() does not generate > a request but the page flip does need one, then the page flip path will create > its own request. > > v3: Updated comment description to be clearer about 'to_req' parameter (Tomas > Elf review request). Rebased onto newer tree that significantly changed the > synchronisation code. > > For: VIZ-5115 > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 4 ++- > drivers/gpu/drm/i915/i915_gem.c | 48 +++++++++++++++++++++------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/intel_display.c | 17 +++++++--- > drivers/gpu/drm/i915/intel_drv.h | 3 +- > drivers/gpu/drm/i915/intel_fbdev.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/intel_overlay.c | 2 +- > 8 files changed, 58 insertions(+), 22 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 64a10fa..f69e9cb 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2778,7 +2778,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to); > + struct intel_engine_cs *to, > + struct drm_i915_gem_request **to_req); > void i915_vma_move_to_active(struct i915_vma *vma, > struct intel_engine_cs *ring); > int i915_gem_dumb_create(struct drm_file *file_priv, > @@ -2889,6 +2890,7 @@ int __must_check > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request, > const struct i915_ggtt_view *view); > void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, > const struct i915_ggtt_view *view); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index b7d66aa..db90043 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3098,25 +3098,26 @@ out: > static int > __i915_gem_object_sync(struct drm_i915_gem_object *obj, > struct intel_engine_cs *to, > - struct drm_i915_gem_request *req) > + struct drm_i915_gem_request *from_req, > + struct drm_i915_gem_request **to_req) > { > struct intel_engine_cs *from; > int ret; > > - from = i915_gem_request_get_ring(req); > + from = i915_gem_request_get_ring(from_req); > if (to == from) > return 0; > > - if (i915_gem_request_completed(req, true)) > + if (i915_gem_request_completed(from_req, true)) > return 0; > > - ret = i915_gem_check_olr(req); > + ret = i915_gem_check_olr(from_req); > if (ret) > return ret; > > if (!i915_semaphore_is_enabled(obj->base.dev)) { > struct drm_i915_private *i915 = to_i915(obj->base.dev); > - ret = __i915_wait_request(req, > + ret = __i915_wait_request(from_req, > atomic_read(&i915->gpu_error.reset_counter), > i915->mm.interruptible, > NULL, > @@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > if (ret) > return ret; > > - i915_gem_object_retire_request(obj, req); > + i915_gem_object_retire_request(obj, from_req); > } else { > int idx = intel_ring_sync_index(from, to); > - u32 seqno = i915_gem_request_get_seqno(req); > + u32 seqno = i915_gem_request_get_seqno(from_req); > > + WARN_ON(!to_req); > + > + /* Optimization: Avoid semaphore sync when we are sure we already > + * waited for an object with higher seqno */ 2. How about using the standard multi-line comment format? /* (empty line) * (first line) * (second line) */ > if (seqno <= from->semaphore.sync_seqno[idx]) > return 0; > > - trace_i915_gem_ring_sync_to(from, to, req); > + if (*to_req == NULL) { > + ret = i915_gem_request_alloc(to, to->default_context, to_req); > + if (ret) > + return ret; > + } > + > + trace_i915_gem_ring_sync_to(from, to, from_req); > ret = to->semaphore.sync_to(to, from, seqno); > if (ret) > return ret; > @@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > * > * @obj: object which may be in use on another ring. > * @to: ring we wish to use the object on. May be NULL. > + * @to_req: request we wish to use the object for. See below. > + * This will be allocated and returned if a request is > + * required but not passed in. > * > * This code is meant to abstract object synchronization with the GPU. > * Calling with NULL implies synchronizing the object with the CPU > @@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, > * - If we are a write request (pending_write_domain is set), the new > * request must wait for outstanding read requests to complete. > * > + * For CPU synchronisation (NULL to) no request is required. For syncing with > + * rings to_req must be non-NULL. However, a request does not have to be > + * pre-allocated. If *to_req is null and sync commands will be emitted then a > + * request will be allocated automatically and returned through *to_req. Note > + * that it is not guaranteed that commands will be emitted (because the > + * might already be idle). Hence there is no need to create a request that > + * might never have any work submitted. Note further that if a request is > + * returned in *to_req, it is the responsibility of the caller to submit > + * that request (after potentially adding more work to it). > + * 3. "(because the might already be idle)" : The what? The engine? 4. "NULL" and "null" mixed. Please be consistent. Overall, the explanation is better than in the last patch version With those minor changes: Reviewed-by: Tomas Elf <tomas.elf@intel.com> Thanks, Tomas > * Returns 0 if successful, else propagates up the lower layer error. > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to) > + struct intel_engine_cs *to, > + struct drm_i915_gem_request **to_req) > { > const bool readonly = obj->base.pending_write_domain == 0; > struct drm_i915_gem_request *req[I915_NUM_RINGS]; > @@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > req[n++] = obj->last_read_req[i]; > } > for (i = 0; i < n; i++) { > - ret = __i915_gem_object_sync(obj, to, req[i]); > + ret = __i915_gem_object_sync(obj, to, req[i], to_req); > if (ret) > return ret; > } > @@ -4144,12 +4169,13 @@ int > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request, > const struct i915_ggtt_view *view) > { > u32 old_read_domains, old_write_domain; > int ret; > > - ret = i915_gem_object_sync(obj, pipelined); > + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 50b1ced..bea92ad 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj = vma->obj; > > if (obj->active & other_rings) { > - ret = i915_gem_object_sync(obj, req->ring); > + ret = i915_gem_object_sync(obj, req->ring, &req); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index 657a333..6528ada 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -2338,7 +2338,8 @@ int > intel_pin_and_fence_fb_obj(struct drm_plane *plane, > struct drm_framebuffer *fb, > const struct drm_plane_state *plane_state, > - struct intel_engine_cs *pipelined) > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request) > { > struct drm_device *dev = fb->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, > > dev_priv->mm.interruptible = false; > ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined, > - &view); > + pipelined_request, &view); > if (ret) > goto err_interruptible; > > @@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > struct intel_unpin_work *work; > struct intel_engine_cs *ring; > bool mmio_flip; > + struct drm_i915_gem_request *request = NULL; > int ret; > > /* > @@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > */ > ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, > crtc->primary->state, > - mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring); > + mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request); > if (ret) > goto cleanup_pending; > > @@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > intel_ring_get_request(ring)); > } > > + if (request) > + i915_add_request_no_flush(request->ring); > + > work->flip_queued_vblank = drm_crtc_vblank_count(crtc); > work->enable_stall_check = true; > > @@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > cleanup_unpin: > intel_unpin_fb_obj(fb, crtc->primary->state); > cleanup_pending: > + if (request) > + i915_gem_request_cancel(request); > atomic_dec(&intel_crtc->unpin_work_count); > mutex_unlock(&dev->struct_mutex); > cleanup: > @@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, > if (ret) > DRM_DEBUG_KMS("failed to attach phys object\n"); > } else { > - ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL); > + ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL); > } > > if (ret == 0) > @@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device *dev) > ret = intel_pin_and_fence_fb_obj(c->primary, > c->primary->fb, > c->primary->state, > - NULL); > + NULL, NULL); > mutex_unlock(&dev->struct_mutex); > if (ret) { > DRM_ERROR("failed to pin boot fb on pipe %d\n", > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 02d8317..73650ae 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, > int intel_pin_and_fence_fb_obj(struct drm_plane *plane, > struct drm_framebuffer *fb, > const struct drm_plane_state *plane_state, > - struct intel_engine_cs *pipelined); > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request); > struct drm_framebuffer * > __intel_framebuffer_create(struct drm_device *dev, > struct drm_mode_fb_cmd2 *mode_cmd, > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index 4e7e7da..dd9f3b2 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > } > > /* Flush everything out, we'll be doing GTT only from now on */ > - ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL); > + ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL); > if (ret) { > DRM_ERROR("failed to pin obj: %d\n", ret); > goto out_fb; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 6d005b1..f8e8fdb 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, > struct drm_i915_gem_object *obj = vma->obj; > > if (obj->active & other_rings) { > - ret = i915_gem_object_sync(obj, req->ring); > + ret = i915_gem_object_sync(obj, req->ring, &req); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index e7534b9..0f8187a 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, > if (ret != 0) > return ret; > > - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, > + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL, > &i915_ggtt_view_normal); > if (ret != 0) > return ret; >
On 02/06/2015 19:26, Tomas Elf wrote: > On 29/05/2015 17:43, John.C.Harrison@Intel.com wrote: >> From: John Harrison <John.C.Harrison@Intel.com> >> >> The plan is to pass requests around as the basic submission tracking >> structure >> rather than rings and contexts. This patch updates the >> i915_gem_object_sync() >> code path. >> >> v2: Much more complex patch to share a single request between the >> sync and the >> page flip. The _sync() function now supports lazy allocation of the >> request >> structure. That is, if one is passed in then that will be used. If >> one is not, >> then a request will be allocated and passed back out. Note that the >> _sync() code >> does not necessarily require a request. Thus one will only be created >> until >> certain situations. The reason the lazy allocation must be done >> within the >> _sync() code itself is because the decision to need one or not is not >> really >> something that code above can second guess (except in the case where >> one is >> definitely not required because no ring is passed in). >> >> The call chains above _sync() now support passing a request through >> which most >> callers passing in NULL and assuming that no request will be required >> (because >> they also pass in NULL for the ring and therefore can't be generating >> any ring >> code). >> >> The exeception is intel_crtc_page_flip() which now supports having a >> request > > 1. "The exeception" -> "The exception" > >> returned from _sync(). If one is, then that request is shared by the >> page flip >> (if the page flip is of a type to need a request). If _sync() does >> not generate >> a request but the page flip does need one, then the page flip path >> will create >> its own request. >> >> v3: Updated comment description to be clearer about 'to_req' >> parameter (Tomas >> Elf review request). Rebased onto newer tree that significantly >> changed the >> synchronisation code. >> >> For: VIZ-5115 >> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 4 ++- >> drivers/gpu/drm/i915/i915_gem.c | 48 >> +++++++++++++++++++++------- >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- >> drivers/gpu/drm/i915/intel_display.c | 17 +++++++--- >> drivers/gpu/drm/i915/intel_drv.h | 3 +- >> drivers/gpu/drm/i915/intel_fbdev.c | 2 +- >> drivers/gpu/drm/i915/intel_lrc.c | 2 +- >> drivers/gpu/drm/i915/intel_overlay.c | 2 +- >> 8 files changed, 58 insertions(+), 22 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 64a10fa..f69e9cb 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2778,7 +2778,8 @@ static inline void >> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) >> >> int __must_check i915_mutex_lock_interruptible(struct drm_device >> *dev); >> int i915_gem_object_sync(struct drm_i915_gem_object *obj, >> - struct intel_engine_cs *to); >> + struct intel_engine_cs *to, >> + struct drm_i915_gem_request **to_req); >> void i915_vma_move_to_active(struct i915_vma *vma, >> struct intel_engine_cs *ring); >> int i915_gem_dumb_create(struct drm_file *file_priv, >> @@ -2889,6 +2890,7 @@ int __must_check >> i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, >> u32 alignment, >> struct intel_engine_cs *pipelined, >> + struct drm_i915_gem_request **pipelined_request, >> const struct i915_ggtt_view *view); >> void i915_gem_object_unpin_from_display_plane(struct >> drm_i915_gem_object *obj, >> const struct i915_ggtt_view *view); >> diff --git a/drivers/gpu/drm/i915/i915_gem.c >> b/drivers/gpu/drm/i915/i915_gem.c >> index b7d66aa..db90043 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -3098,25 +3098,26 @@ out: >> static int >> __i915_gem_object_sync(struct drm_i915_gem_object *obj, >> struct intel_engine_cs *to, >> - struct drm_i915_gem_request *req) >> + struct drm_i915_gem_request *from_req, >> + struct drm_i915_gem_request **to_req) >> { >> struct intel_engine_cs *from; >> int ret; >> >> - from = i915_gem_request_get_ring(req); >> + from = i915_gem_request_get_ring(from_req); >> if (to == from) >> return 0; >> >> - if (i915_gem_request_completed(req, true)) >> + if (i915_gem_request_completed(from_req, true)) >> return 0; >> >> - ret = i915_gem_check_olr(req); >> + ret = i915_gem_check_olr(from_req); >> if (ret) >> return ret; >> >> if (!i915_semaphore_is_enabled(obj->base.dev)) { >> struct drm_i915_private *i915 = to_i915(obj->base.dev); >> - ret = __i915_wait_request(req, >> + ret = __i915_wait_request(from_req, >> atomic_read(&i915->gpu_error.reset_counter), >> i915->mm.interruptible, >> NULL, >> @@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct >> drm_i915_gem_object *obj, >> if (ret) >> return ret; >> >> - i915_gem_object_retire_request(obj, req); >> + i915_gem_object_retire_request(obj, from_req); >> } else { >> int idx = intel_ring_sync_index(from, to); >> - u32 seqno = i915_gem_request_get_seqno(req); >> + u32 seqno = i915_gem_request_get_seqno(from_req); >> >> + WARN_ON(!to_req); >> + >> + /* Optimization: Avoid semaphore sync when we are sure we >> already >> + * waited for an object with higher seqno */ > > 2. How about using the standard multi-line comment format? > Not my comment. It looks like Chris removed it in his re-write of the sync code and I accidentally put it back in when resolving the merge conflicts. I'll drop it again. > /* (empty line) > * (first line) > * (second line) > */ > >> if (seqno <= from->semaphore.sync_seqno[idx]) >> return 0; >> >> - trace_i915_gem_ring_sync_to(from, to, req); >> + if (*to_req == NULL) { >> + ret = i915_gem_request_alloc(to, to->default_context, >> to_req); >> + if (ret) >> + return ret; >> + } >> + >> + trace_i915_gem_ring_sync_to(from, to, from_req); >> ret = to->semaphore.sync_to(to, from, seqno); >> if (ret) >> return ret; >> @@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct >> drm_i915_gem_object *obj, >> * >> * @obj: object which may be in use on another ring. >> * @to: ring we wish to use the object on. May be NULL. >> + * @to_req: request we wish to use the object for. See below. >> + * This will be allocated and returned if a request is >> + * required but not passed in. >> * >> * This code is meant to abstract object synchronization with the GPU. >> * Calling with NULL implies synchronizing the object with the CPU >> @@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct >> drm_i915_gem_object *obj, >> * - If we are a write request (pending_write_domain is set), the new >> * request must wait for outstanding read requests to complete. >> * >> + * For CPU synchronisation (NULL to) no request is required. For >> syncing with >> + * rings to_req must be non-NULL. However, a request does not have >> to be >> + * pre-allocated. If *to_req is null and sync commands will be >> emitted then a >> + * request will be allocated automatically and returned through >> *to_req. Note >> + * that it is not guaranteed that commands will be emitted (because the >> + * might already be idle). Hence there is no need to create a >> request that >> + * might never have any work submitted. Note further that if a >> request is >> + * returned in *to_req, it is the responsibility of the caller to >> submit >> + * that request (after potentially adding more work to it). >> + * > > 3. "(because the might already be idle)" : The what? The engine? > 4. "NULL" and "null" mixed. Please be consistent. > > Overall, the explanation is better than in the last patch version > > With those minor changes: > > Reviewed-by: Tomas Elf <tomas.elf@intel.com> > > Thanks, > Tomas > >> * Returns 0 if successful, else propagates up the lower layer error. >> */ >> int >> i915_gem_object_sync(struct drm_i915_gem_object *obj, >> - struct intel_engine_cs *to) >> + struct intel_engine_cs *to, >> + struct drm_i915_gem_request **to_req) >> { >> const bool readonly = obj->base.pending_write_domain == 0; >> struct drm_i915_gem_request *req[I915_NUM_RINGS]; >> @@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object >> *obj, >> req[n++] = obj->last_read_req[i]; >> } >> for (i = 0; i < n; i++) { >> - ret = __i915_gem_object_sync(obj, to, req[i]); >> + ret = __i915_gem_object_sync(obj, to, req[i], to_req); >> if (ret) >> return ret; >> } >> @@ -4144,12 +4169,13 @@ int >> i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, >> u32 alignment, >> struct intel_engine_cs *pipelined, >> + struct drm_i915_gem_request **pipelined_request, >> const struct i915_ggtt_view *view) >> { >> u32 old_read_domains, old_write_domain; >> int ret; >> >> - ret = i915_gem_object_sync(obj, pipelined); >> + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); >> if (ret) >> return ret; >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index 50b1ced..bea92ad 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct >> drm_i915_gem_request *req, >> struct drm_i915_gem_object *obj = vma->obj; >> >> if (obj->active & other_rings) { >> - ret = i915_gem_object_sync(obj, req->ring); >> + ret = i915_gem_object_sync(obj, req->ring, &req); >> if (ret) >> return ret; >> } >> diff --git a/drivers/gpu/drm/i915/intel_display.c >> b/drivers/gpu/drm/i915/intel_display.c >> index 657a333..6528ada 100644 >> --- a/drivers/gpu/drm/i915/intel_display.c >> +++ b/drivers/gpu/drm/i915/intel_display.c >> @@ -2338,7 +2338,8 @@ int >> intel_pin_and_fence_fb_obj(struct drm_plane *plane, >> struct drm_framebuffer *fb, >> const struct drm_plane_state *plane_state, >> - struct intel_engine_cs *pipelined) >> + struct intel_engine_cs *pipelined, >> + struct drm_i915_gem_request **pipelined_request) >> { >> struct drm_device *dev = fb->dev; >> struct drm_i915_private *dev_priv = dev->dev_private; >> @@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane >> *plane, >> >> dev_priv->mm.interruptible = false; >> ret = i915_gem_object_pin_to_display_plane(obj, alignment, >> pipelined, >> - &view); >> + pipelined_request, &view); >> if (ret) >> goto err_interruptible; >> >> @@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct >> drm_crtc *crtc, >> struct intel_unpin_work *work; >> struct intel_engine_cs *ring; >> bool mmio_flip; >> + struct drm_i915_gem_request *request = NULL; >> int ret; >> >> /* >> @@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct >> drm_crtc *crtc, >> */ >> ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, >> crtc->primary->state, >> - mmio_flip ? >> i915_gem_request_get_ring(obj->last_write_req) : ring); >> + mmio_flip ? >> i915_gem_request_get_ring(obj->last_write_req) : ring, &request); >> if (ret) >> goto cleanup_pending; >> >> @@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct >> drm_crtc *crtc, >> intel_ring_get_request(ring)); >> } >> >> + if (request) >> + i915_add_request_no_flush(request->ring); >> + >> work->flip_queued_vblank = drm_crtc_vblank_count(crtc); >> work->enable_stall_check = true; >> >> @@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct >> drm_crtc *crtc, >> cleanup_unpin: >> intel_unpin_fb_obj(fb, crtc->primary->state); >> cleanup_pending: >> + if (request) >> + i915_gem_request_cancel(request); >> atomic_dec(&intel_crtc->unpin_work_count); >> mutex_unlock(&dev->struct_mutex); >> cleanup: >> @@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, >> if (ret) >> DRM_DEBUG_KMS("failed to attach phys object\n"); >> } else { >> - ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL); >> + ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, >> NULL); >> } >> >> if (ret == 0) >> @@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device >> *dev) >> ret = intel_pin_and_fence_fb_obj(c->primary, >> c->primary->fb, >> c->primary->state, >> - NULL); >> + NULL, NULL); >> mutex_unlock(&dev->struct_mutex); >> if (ret) { >> DRM_ERROR("failed to pin boot fb on pipe %d\n", >> diff --git a/drivers/gpu/drm/i915/intel_drv.h >> b/drivers/gpu/drm/i915/intel_drv.h >> index 02d8317..73650ae 100644 >> --- a/drivers/gpu/drm/i915/intel_drv.h >> +++ b/drivers/gpu/drm/i915/intel_drv.h >> @@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct >> drm_connector *connector, >> int intel_pin_and_fence_fb_obj(struct drm_plane *plane, >> struct drm_framebuffer *fb, >> const struct drm_plane_state *plane_state, >> - struct intel_engine_cs *pipelined); >> + struct intel_engine_cs *pipelined, >> + struct drm_i915_gem_request **pipelined_request); >> struct drm_framebuffer * >> __intel_framebuffer_create(struct drm_device *dev, >> struct drm_mode_fb_cmd2 *mode_cmd, >> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c >> b/drivers/gpu/drm/i915/intel_fbdev.c >> index 4e7e7da..dd9f3b2 100644 >> --- a/drivers/gpu/drm/i915/intel_fbdev.c >> +++ b/drivers/gpu/drm/i915/intel_fbdev.c >> @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper >> *helper, >> } >> >> /* Flush everything out, we'll be doing GTT only from now on */ >> - ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL); >> + ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL); >> if (ret) { >> DRM_ERROR("failed to pin obj: %d\n", ret); >> goto out_fb; >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index 6d005b1..f8e8fdb 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct >> drm_i915_gem_request *req, >> struct drm_i915_gem_object *obj = vma->obj; >> >> if (obj->active & other_rings) { >> - ret = i915_gem_object_sync(obj, req->ring); >> + ret = i915_gem_object_sync(obj, req->ring, &req); >> if (ret) >> return ret; >> } >> diff --git a/drivers/gpu/drm/i915/intel_overlay.c >> b/drivers/gpu/drm/i915/intel_overlay.c >> index e7534b9..0f8187a 100644 >> --- a/drivers/gpu/drm/i915/intel_overlay.c >> +++ b/drivers/gpu/drm/i915/intel_overlay.c >> @@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct >> intel_overlay *overlay, >> if (ret != 0) >> return ret; >> >> - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, >> + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL, >> &i915_ggtt_view_normal); >> if (ret != 0) >> return ret; >> >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 64a10fa..f69e9cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2778,7 +2778,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to); + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req); void i915_vma_move_to_active(struct i915_vma *vma, struct intel_engine_cs *ring); int i915_gem_dumb_create(struct drm_file *file_priv, @@ -2889,6 +2890,7 @@ int __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request, const struct i915_ggtt_view *view); void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b7d66aa..db90043 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3098,25 +3098,26 @@ out: static int __i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_engine_cs *to, - struct drm_i915_gem_request *req) + struct drm_i915_gem_request *from_req, + struct drm_i915_gem_request **to_req) { struct intel_engine_cs *from; int ret; - from = i915_gem_request_get_ring(req); + from = i915_gem_request_get_ring(from_req); if (to == from) return 0; - if (i915_gem_request_completed(req, true)) + if (i915_gem_request_completed(from_req, true)) return 0; - ret = i915_gem_check_olr(req); + ret = i915_gem_check_olr(from_req); if (ret) return ret; if (!i915_semaphore_is_enabled(obj->base.dev)) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - ret = __i915_wait_request(req, + ret = __i915_wait_request(from_req, atomic_read(&i915->gpu_error.reset_counter), i915->mm.interruptible, NULL, @@ -3124,15 +3125,25 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, if (ret) return ret; - i915_gem_object_retire_request(obj, req); + i915_gem_object_retire_request(obj, from_req); } else { int idx = intel_ring_sync_index(from, to); - u32 seqno = i915_gem_request_get_seqno(req); + u32 seqno = i915_gem_request_get_seqno(from_req); + WARN_ON(!to_req); + + /* Optimization: Avoid semaphore sync when we are sure we already + * waited for an object with higher seqno */ if (seqno <= from->semaphore.sync_seqno[idx]) return 0; - trace_i915_gem_ring_sync_to(from, to, req); + if (*to_req == NULL) { + ret = i915_gem_request_alloc(to, to->default_context, to_req); + if (ret) + return ret; + } + + trace_i915_gem_ring_sync_to(from, to, from_req); ret = to->semaphore.sync_to(to, from, seqno); if (ret) return ret; @@ -3153,6 +3164,9 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * * @obj: object which may be in use on another ring. * @to: ring we wish to use the object on. May be NULL. + * @to_req: request we wish to use the object for. See below. + * This will be allocated and returned if a request is + * required but not passed in. * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU @@ -3168,11 +3182,22 @@ __i915_gem_object_sync(struct drm_i915_gem_object *obj, * - If we are a write request (pending_write_domain is set), the new * request must wait for outstanding read requests to complete. * + * For CPU synchronisation (NULL to) no request is required. For syncing with + * rings to_req must be non-NULL. However, a request does not have to be + * pre-allocated. If *to_req is null and sync commands will be emitted then a + * request will be allocated automatically and returned through *to_req. Note + * that it is not guaranteed that commands will be emitted (because the + * might already be idle). Hence there is no need to create a request that + * might never have any work submitted. Note further that if a request is + * returned in *to_req, it is the responsibility of the caller to submit + * that request (after potentially adding more work to it). + * * Returns 0 if successful, else propagates up the lower layer error. */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to) + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req) { const bool readonly = obj->base.pending_write_domain == 0; struct drm_i915_gem_request *req[I915_NUM_RINGS]; @@ -3194,7 +3219,7 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, req[n++] = obj->last_read_req[i]; } for (i = 0; i < n; i++) { - ret = __i915_gem_object_sync(obj, to, req[i]); + ret = __i915_gem_object_sync(obj, to, req[i], to_req); if (ret) return ret; } @@ -4144,12 +4169,13 @@ int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request, const struct i915_ggtt_view *view) { u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_sync(obj, pipelined); + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 50b1ced..bea92ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -899,7 +899,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj = vma->obj; if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->ring); + ret = i915_gem_object_sync(obj, req->ring, &req); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 657a333..6528ada 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2338,7 +2338,8 @@ int intel_pin_and_fence_fb_obj(struct drm_plane *plane, struct drm_framebuffer *fb, const struct drm_plane_state *plane_state, - struct intel_engine_cs *pipelined) + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request) { struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -2403,7 +2404,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, dev_priv->mm.interruptible = false; ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined, - &view); + pipelined_request, &view); if (ret) goto err_interruptible; @@ -11119,6 +11120,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_unpin_work *work; struct intel_engine_cs *ring; bool mmio_flip; + struct drm_i915_gem_request *request = NULL; int ret; /* @@ -11225,7 +11227,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, */ ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, crtc->primary->state, - mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring); + mmio_flip ? i915_gem_request_get_ring(obj->last_write_req) : ring, &request); if (ret) goto cleanup_pending; @@ -11256,6 +11258,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_ring_get_request(ring)); } + if (request) + i915_add_request_no_flush(request->ring); + work->flip_queued_vblank = drm_crtc_vblank_count(crtc); work->enable_stall_check = true; @@ -11273,6 +11278,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_unpin: intel_unpin_fb_obj(fb, crtc->primary->state); cleanup_pending: + if (request) + i915_gem_request_cancel(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: @@ -13171,7 +13178,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) DRM_DEBUG_KMS("failed to attach phys object\n"); } else { - ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL); + ret = intel_pin_and_fence_fb_obj(plane, fb, new_state, NULL, NULL); } if (ret == 0) @@ -15218,7 +15225,7 @@ void intel_modeset_gem_init(struct drm_device *dev) ret = intel_pin_and_fence_fb_obj(c->primary, c->primary->fb, c->primary->state, - NULL); + NULL, NULL); mutex_unlock(&dev->struct_mutex); if (ret) { DRM_ERROR("failed to pin boot fb on pipe %d\n", diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 02d8317..73650ae 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1034,7 +1034,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, int intel_pin_and_fence_fb_obj(struct drm_plane *plane, struct drm_framebuffer *fb, const struct drm_plane_state *plane_state, - struct intel_engine_cs *pipelined); + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 4e7e7da..dd9f3b2 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, } /* Flush everything out, we'll be doing GTT only from now on */ - ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL); + ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL, NULL); if (ret) { DRM_ERROR("failed to pin obj: %d\n", ret); goto out_fb; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 6d005b1..f8e8fdb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -638,7 +638,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, struct drm_i915_gem_object *obj = vma->obj; if (obj->active & other_rings) { - ret = i915_gem_object_sync(obj, req->ring); + ret = i915_gem_object_sync(obj, req->ring, &req); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index e7534b9..0f8187a 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -724,7 +724,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL, &i915_ggtt_view_normal); if (ret != 0) return ret;