Message ID | 1426768264-16996-29-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 19/03/2015 12:30, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > The plan is to pass requests around as the basic submission tracking structure > rather than rings and contexts. This patch updates the i915_gem_object_sync() > code path. > > v2: Much more complex patch to share a single request between the sync and the > page flip. The _sync() function now supports lazy allocation of the request > structure. That is, if one is passed in then that will be used. If one is not, > then a request will be allocated and passed back out. Note that the _sync() code > does not necessarily require a request. Thus one will only be created until > certain situations. The reason the lazy allocation must be done within the > _sync() code itself is because the decision to need one or not is not really > something that code above can second guess (except in the case where one is > definitely not required because no ring is passed in). > > The call chains above _sync() now support passing a request through which most > callers passing in NULL and assuming that no request will be required (because > they also pass in NULL for the ring and therefore can't be generating any ring > code). > > The exeception is intel_crtc_page_flip() which now supports having a request > returned from _sync(). If one is, then that request is shared by the page flip > (if the page flip is of a type to need a request). If _sync() does not generate > a request but the page flip does need one, then the page flip path will create > it's own request. Nitpick: "it's own request" -> "its own request" > > For: VIZ-5115 > Signed-off-by: John Harrison <John.C.Harrison@Intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 6 ++++-- > drivers/gpu/drm/i915/i915_gem.c | 19 ++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- > drivers/gpu/drm/i915/intel_display.c | 17 ++++++++++++----- > drivers/gpu/drm/i915/intel_drv.h | 3 ++- > drivers/gpu/drm/i915/intel_fbdev.c | 2 +- > drivers/gpu/drm/i915/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/intel_overlay.c | 2 +- > 8 files changed, 38 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index ca070d3..55d82a7 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2665,7 +2665,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) > > int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); > int i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to); > + struct intel_engine_cs *to, > + struct drm_i915_gem_request **to_req); > void i915_vma_move_to_active(struct i915_vma *vma, > struct intel_engine_cs *ring); > int i915_gem_dumb_create(struct drm_file *file_priv, > @@ -2773,7 +2774,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); > int __must_check > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > - struct intel_engine_cs *pipelined); > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request); > void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj); > int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > int align); > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 0a25461..9114071 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2957,6 +2957,9 @@ out: > * > * @obj: object which may be in use on another ring. > * @to: ring we wish to use the object on. May be NULL. > + * @to_req: request we wish to use the object for. May be NULL. to_req may only be NULL if to is NULL. And to may only be NULL if to_req is NULL. > + * This will be allocated and returned if a request is > + * required but not passed in. "This will be allocated and returned if a request is required but not passed in" sounds like to_req is NULL. However, what we mean here is that *to_req is NULL. And that can coincide with to not being NULL. So together with the relationship between to_req and to described in my remark above this adds another level of complexity. Could we perhaps formulate the full extent of the NULLability of these parameters in a consistent way instead of two arbitrary sentences? Like something along the lines of: - to can be NULL if to_req is NULL - to_req can be NULL if to is NULL - If *to_req is NULL it may be allocated and returned. Thanks, Tomas > * > * This code is meant to abstract object synchronization with the GPU. > * Calling with NULL implies synchronizing the object with the CPU > @@ -2966,7 +2969,8 @@ out: > */ > int > i915_gem_object_sync(struct drm_i915_gem_object *obj, > - struct intel_engine_cs *to) > + struct intel_engine_cs *to, > + struct drm_i915_gem_request **to_req) > { > struct intel_engine_cs *from; > u32 seqno; > @@ -2980,6 +2984,8 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) > return i915_gem_object_wait_rendering(obj, false); > > + WARN_ON(!to_req); > + > idx = intel_ring_sync_index(from, to); > > seqno = i915_gem_request_get_seqno(obj->last_read_req); > @@ -2988,6 +2994,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > if (seqno <= from->semaphore.sync_seqno[idx]) > return 0; > > + if (*to_req == NULL) { > + ret = i915_gem_request_alloc(to, to->default_context, to_req); > + if (ret) > + return ret; > + } > + > ret = i915_gem_check_olr(obj->last_read_req); > if (ret) > return ret; > @@ -3953,14 +3965,15 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) > int > i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, > u32 alignment, > - struct intel_engine_cs *pipelined) > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request) > { > u32 old_read_domains, old_write_domain; > bool was_pin_display; > int ret; > > if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { > - ret = i915_gem_object_sync(obj, pipelined); > + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); > if (ret) > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index eca88a6..1978f9c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -902,7 +902,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, > > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > - ret = i915_gem_object_sync(obj, req->ring); > + ret = i915_gem_object_sync(obj, req->ring, &req); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c > index f1c0295..c71c523 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -2245,7 +2245,8 @@ intel_fb_align_height(struct drm_device *dev, int height, > int > intel_pin_and_fence_fb_obj(struct drm_plane *plane, > struct drm_framebuffer *fb, > - struct intel_engine_cs *pipelined) > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request) > { > struct drm_device *dev = fb->dev; > struct drm_i915_private *dev_priv = dev->dev_private; > @@ -2304,7 +2305,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, > intel_runtime_pm_get(dev_priv); > > dev_priv->mm.interruptible = false; > - ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined); > + ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined, pipelined_request); > if (ret) > goto err_interruptible; > > @@ -9882,6 +9883,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > enum pipe pipe = intel_crtc->pipe; > struct intel_unpin_work *work; > struct intel_engine_cs *ring; > + struct drm_i915_gem_request *request = NULL; > int ret; > > /* > @@ -9979,7 +9981,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > ring = &dev_priv->ring[RCS]; > } > > - ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, ring); > + ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, ring, &request); > if (ret) > goto cleanup_pending; > > @@ -10004,6 +10006,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > intel_ring_get_request(ring)); > } > > + if (request) > + i915_add_request_no_flush(request->ring); > + > work->flip_queued_vblank = drm_crtc_vblank_count(crtc); > work->enable_stall_check = true; > > @@ -10021,6 +10026,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, > cleanup_unpin: > intel_unpin_fb_obj(obj); > cleanup_pending: > + if (request) > + i915_gem_request_cancel(request); > atomic_dec(&intel_crtc->unpin_work_count); > mutex_unlock(&dev->struct_mutex); > cleanup: > @@ -11975,7 +11982,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, > if (ret) > DRM_DEBUG_KMS("failed to attach phys object\n"); > } else { > - ret = intel_pin_and_fence_fb_obj(plane, fb, NULL); > + ret = intel_pin_and_fence_fb_obj(plane, fb, NULL, NULL); > } > > if (ret == 0) > @@ -13900,7 +13907,7 @@ void intel_modeset_gem_init(struct drm_device *dev) > > if (intel_pin_and_fence_fb_obj(c->primary, > c->primary->fb, > - NULL)) { > + NULL, NULL)) { > DRM_ERROR("failed to pin boot fb on pipe %d\n", > to_intel_crtc(c)->pipe); > drm_framebuffer_unreference(c->primary->fb); > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h > index 8bb18e5..6ad2823 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -961,7 +961,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, > struct intel_load_detect_pipe *old); > int intel_pin_and_fence_fb_obj(struct drm_plane *plane, > struct drm_framebuffer *fb, > - struct intel_engine_cs *pipelined); > + struct intel_engine_cs *pipelined, > + struct drm_i915_gem_request **pipelined_request); > struct drm_framebuffer * > __intel_framebuffer_create(struct drm_device *dev, > struct drm_mode_fb_cmd2 *mode_cmd, > diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c > index 757c0d2..4e7e7da 100644 > --- a/drivers/gpu/drm/i915/intel_fbdev.c > +++ b/drivers/gpu/drm/i915/intel_fbdev.c > @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, > } > > /* Flush everything out, we'll be doing GTT only from now on */ > - ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL); > + ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL); > if (ret) { > DRM_ERROR("failed to pin obj: %d\n", ret); > goto out_fb; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 0480ad4..93faf11 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -590,7 +590,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, > list_for_each_entry(vma, vmas, exec_list) { > struct drm_i915_gem_object *obj = vma->obj; > > - ret = i915_gem_object_sync(obj, req->ring); > + ret = i915_gem_object_sync(obj, req->ring, &req); > if (ret) > return ret; > > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c > index 6af2581..ce7797c 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -720,7 +720,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, > if (ret != 0) > return ret; > > - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); > + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL); > if (ret != 0) > return ret; > >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ca070d3..55d82a7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2665,7 +2665,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to); + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req); void i915_vma_move_to_active(struct i915_vma *vma, struct intel_engine_cs *ring); int i915_gem_dumb_create(struct drm_file *file_priv, @@ -2773,7 +2774,8 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); int __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, - struct intel_engine_cs *pipelined); + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request); void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj); int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a25461..9114071 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2957,6 +2957,9 @@ out: * * @obj: object which may be in use on another ring. * @to: ring we wish to use the object on. May be NULL. + * @to_req: request we wish to use the object for. May be NULL. + * This will be allocated and returned if a request is + * required but not passed in. * * This code is meant to abstract object synchronization with the GPU. * Calling with NULL implies synchronizing the object with the CPU @@ -2966,7 +2969,8 @@ out: */ int i915_gem_object_sync(struct drm_i915_gem_object *obj, - struct intel_engine_cs *to) + struct intel_engine_cs *to, + struct drm_i915_gem_request **to_req) { struct intel_engine_cs *from; u32 seqno; @@ -2980,6 +2984,8 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) return i915_gem_object_wait_rendering(obj, false); + WARN_ON(!to_req); + idx = intel_ring_sync_index(from, to); seqno = i915_gem_request_get_seqno(obj->last_read_req); @@ -2988,6 +2994,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, if (seqno <= from->semaphore.sync_seqno[idx]) return 0; + if (*to_req == NULL) { + ret = i915_gem_request_alloc(to, to->default_context, to_req); + if (ret) + return ret; + } + ret = i915_gem_check_olr(obj->last_read_req); if (ret) return ret; @@ -3953,14 +3965,15 @@ static bool is_pin_display(struct drm_i915_gem_object *obj) int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, - struct intel_engine_cs *pipelined) + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request) { u32 old_read_domains, old_write_domain; bool was_pin_display; int ret; if (pipelined != i915_gem_request_get_ring(obj->last_read_req)) { - ret = i915_gem_object_sync(obj, pipelined); + ret = i915_gem_object_sync(obj, pipelined, pipelined_request); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index eca88a6..1978f9c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -902,7 +902,7 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, req->ring); + ret = i915_gem_object_sync(obj, req->ring, &req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f1c0295..c71c523 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2245,7 +2245,8 @@ intel_fb_align_height(struct drm_device *dev, int height, int intel_pin_and_fence_fb_obj(struct drm_plane *plane, struct drm_framebuffer *fb, - struct intel_engine_cs *pipelined) + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request) { struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = dev->dev_private; @@ -2304,7 +2305,7 @@ intel_pin_and_fence_fb_obj(struct drm_plane *plane, intel_runtime_pm_get(dev_priv); dev_priv->mm.interruptible = false; - ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined); + ret = i915_gem_object_pin_to_display_plane(obj, alignment, pipelined, pipelined_request); if (ret) goto err_interruptible; @@ -9882,6 +9883,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, enum pipe pipe = intel_crtc->pipe; struct intel_unpin_work *work; struct intel_engine_cs *ring; + struct drm_i915_gem_request *request = NULL; int ret; /* @@ -9979,7 +9981,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, ring = &dev_priv->ring[RCS]; } - ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, ring); + ret = intel_pin_and_fence_fb_obj(crtc->primary, fb, ring, &request); if (ret) goto cleanup_pending; @@ -10004,6 +10006,9 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, intel_ring_get_request(ring)); } + if (request) + i915_add_request_no_flush(request->ring); + work->flip_queued_vblank = drm_crtc_vblank_count(crtc); work->enable_stall_check = true; @@ -10021,6 +10026,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, cleanup_unpin: intel_unpin_fb_obj(obj); cleanup_pending: + if (request) + i915_gem_request_cancel(request); atomic_dec(&intel_crtc->unpin_work_count); mutex_unlock(&dev->struct_mutex); cleanup: @@ -11975,7 +11982,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) DRM_DEBUG_KMS("failed to attach phys object\n"); } else { - ret = intel_pin_and_fence_fb_obj(plane, fb, NULL); + ret = intel_pin_and_fence_fb_obj(plane, fb, NULL, NULL); } if (ret == 0) @@ -13900,7 +13907,7 @@ void intel_modeset_gem_init(struct drm_device *dev) if (intel_pin_and_fence_fb_obj(c->primary, c->primary->fb, - NULL)) { + NULL, NULL)) { DRM_ERROR("failed to pin boot fb on pipe %d\n", to_intel_crtc(c)->pipe); drm_framebuffer_unreference(c->primary->fb); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8bb18e5..6ad2823 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -961,7 +961,8 @@ void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old); int intel_pin_and_fence_fb_obj(struct drm_plane *plane, struct drm_framebuffer *fb, - struct intel_engine_cs *pipelined); + struct intel_engine_cs *pipelined, + struct drm_i915_gem_request **pipelined_request); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 757c0d2..4e7e7da 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -151,7 +151,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, } /* Flush everything out, we'll be doing GTT only from now on */ - ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL); + ret = intel_pin_and_fence_fb_obj(NULL, fb, NULL, NULL); if (ret) { DRM_ERROR("failed to pin obj: %d\n", ret); goto out_fb; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0480ad4..93faf11 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -590,7 +590,7 @@ static int execlists_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - ret = i915_gem_object_sync(obj, req->ring); + ret = i915_gem_object_sync(obj, req->ring, &req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 6af2581..ce7797c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -720,7 +720,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL); + ret = i915_gem_object_pin_to_display_plane(new_bo, 0, NULL, NULL); if (ret != 0) return ret;