Message ID | 1452503961-14837-73-git-send-email-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 11/01/16 09:17, Chris Wilson wrote: > In the next patch, request tracking is made more generic and for that we > need a new expanded struct and to separate out the logic changes from > the mechanical churn, we split out the structure renaming into this > patch. > > v2: Writer's block. Add some spiel about why we track requests. > v3: Now i915_gem_active. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/i915_debugfs.c | 10 +++--- > drivers/gpu/drm/i915/i915_drv.h | 9 +++-- > drivers/gpu/drm/i915/i915_gem.c | 56 +++++++++++++++--------------- > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-- > drivers/gpu/drm/i915/i915_gem_fence.c | 6 ++-- > drivers/gpu/drm/i915/i915_gem_request.h | 38 ++++++++++++++++++++ > drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- > drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++-- > drivers/gpu/drm/i915/intel_display.c | 10 +++--- > 9 files changed, 89 insertions(+), 52 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index 8de944ed3369..65cb1d6a5d64 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) > obj->base.write_domain); > for_each_ring(ring, dev_priv, i) > seq_printf(m, "%x ", > - i915_gem_request_get_seqno(obj->last_read_req[i])); > + i915_gem_request_get_seqno(obj->last_read[i].request)); > seq_printf(m, "] %x %x%s%s%s", > - i915_gem_request_get_seqno(obj->last_write_req), > - i915_gem_request_get_seqno(obj->last_fenced_req), > + i915_gem_request_get_seqno(obj->last_write.request), > + i915_gem_request_get_seqno(obj->last_fence.request), > i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), > obj->dirty ? " dirty" : "", > obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); > @@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) > *t = '\0'; > seq_printf(m, " (%s mappable)", s); > } > - if (obj->last_write_req != NULL) > - seq_printf(m, " (%s)", obj->last_write_req->engine->name); > + if (obj->last_write.request != NULL) > + seq_printf(m, " (%s)", obj->last_write.request->engine->name); > if (obj->frontbuffer_bits) > seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); > } > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index cae448e238ca..c577f86d94f8 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2110,11 +2110,10 @@ struct drm_i915_gem_object { > * requests on one ring where the write request is older than the > * read request. This allows for the CPU to read from an active > * buffer by only waiting for the write to complete. > - * */ > - struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS]; > - struct drm_i915_gem_request *last_write_req; > - /** Breadcrumb of last fenced GPU access to the buffer. */ > - struct drm_i915_gem_request *last_fenced_req; > + */ > + struct i915_gem_active last_read[I915_NUM_RINGS]; > + struct i915_gem_active last_write; > + struct i915_gem_active last_fence; > > /** Current tiling stride for the object, if it's tiled. */ > uint32_t stride; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index b0230e7151ce..77c253ddf060 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, > return 0; > > if (readonly) { > - if (obj->last_write_req != NULL) { > - ret = i915_wait_request(obj->last_write_req); > + if (obj->last_write.request != NULL) { > + ret = i915_wait_request(obj->last_write.request); > if (ret) > return ret; > > - i = obj->last_write_req->engine->id; > - if (obj->last_read_req[i] == obj->last_write_req) > + i = obj->last_write.request->engine->id; > + if (obj->last_read[i].request == obj->last_write.request) > i915_gem_object_retire__read(obj, i); > else > i915_gem_object_retire__write(obj); > } > } else { > for (i = 0; i < I915_NUM_RINGS; i++) { > - if (obj->last_read_req[i] == NULL) > + if (obj->last_read[i].request == NULL) > continue; > > - ret = i915_wait_request(obj->last_read_req[i]); > + ret = i915_wait_request(obj->last_read[i].request); > if (ret) > return ret; > > @@ -1151,9 +1151,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, > { > int ring = req->engine->id; > > - if (obj->last_read_req[ring] == req) > + if (obj->last_read[ring].request == req) > i915_gem_object_retire__read(obj, ring); > - else if (obj->last_write_req == req) > + else if (obj->last_write.request == req) > i915_gem_object_retire__write(obj); > > i915_gem_request_retire_upto(req); > @@ -1181,7 +1181,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, > if (readonly) { > struct drm_i915_gem_request *req; > > - req = obj->last_write_req; > + req = obj->last_write.request; > if (req == NULL) > return 0; > > @@ -1190,7 +1190,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, > for (i = 0; i < I915_NUM_RINGS; i++) { > struct drm_i915_gem_request *req; > > - req = obj->last_read_req[i]; > + req = obj->last_read[i].request; > if (req == NULL) > continue; > > @@ -2070,7 +2070,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, > obj->active |= intel_engine_flag(engine); > > list_move_tail(&obj->ring_list[engine->id], &engine->active_list); > - i915_gem_request_assign(&obj->last_read_req[engine->id], req); > + i915_gem_request_mark_active(req, &obj->last_read[engine->id]); > > list_move_tail(&vma->mm_list, &vma->vm->active_list); > } > @@ -2078,10 +2078,10 @@ void i915_vma_move_to_active(struct i915_vma *vma, > static void > i915_gem_object_retire__write(struct drm_i915_gem_object *obj) > { > - GEM_BUG_ON(obj->last_write_req == NULL); > - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); > + GEM_BUG_ON(obj->last_write.request == NULL); > + GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine))); > > - i915_gem_request_assign(&obj->last_write_req, NULL); > + i915_gem_request_assign(&obj->last_write.request, NULL); > intel_fb_obj_flush(obj, true, ORIGIN_CS); > } > > @@ -2090,13 +2090,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) > { > struct i915_vma *vma; > > - GEM_BUG_ON(obj->last_read_req[ring] == NULL); > + GEM_BUG_ON(obj->last_read[ring].request == NULL); > GEM_BUG_ON(!(obj->active & (1 << ring))); > > list_del_init(&obj->ring_list[ring]); > - i915_gem_request_assign(&obj->last_read_req[ring], NULL); > + i915_gem_request_assign(&obj->last_read[ring].request, NULL); > > - if (obj->last_write_req && obj->last_write_req->engine->id == ring) > + if (obj->last_write.request && obj->last_write.request->engine->id == ring) > i915_gem_object_retire__write(obj); > > obj->active &= ~(1 << ring); > @@ -2115,7 +2115,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) > list_move_tail(&vma->mm_list, &vma->vm->inactive_list); > } > > - i915_gem_request_assign(&obj->last_fenced_req, NULL); > + i915_gem_request_assign(&obj->last_fence.request, NULL); > drm_gem_object_unreference(&obj->base); > } > > @@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) > struct drm_i915_gem_object, > ring_list[ring->id]); > > - if (!list_empty(&obj->last_read_req[ring->id]->list)) > + if (!list_empty(&obj->last_read[ring->id].request->list)) > break; > > i915_gem_object_retire__read(obj, ring->id); > @@ -2445,7 +2445,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) > for (i = 0; i < I915_NUM_RINGS; i++) { > struct drm_i915_gem_request *req; > > - req = obj->last_read_req[i]; > + req = obj->last_read[i].request; > if (req == NULL) > continue; > > @@ -2525,10 +2525,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) > drm_gem_object_unreference(&obj->base); > > for (i = 0; i < I915_NUM_RINGS; i++) { > - if (obj->last_read_req[i] == NULL) > + if (obj->last_read[i].request == NULL) > continue; > > - req[n++] = i915_gem_request_get(obj->last_read_req[i]); > + req[n++] = i915_gem_request_get(obj->last_read[i].request); > } > > mutex_unlock(&dev->struct_mutex); > @@ -2619,12 +2619,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, > > n = 0; > if (readonly) { > - if (obj->last_write_req) > - req[n++] = obj->last_write_req; > + if (obj->last_write.request) > + req[n++] = obj->last_write.request; > } else { > for (i = 0; i < I915_NUM_RINGS; i++) > - if (obj->last_read_req[i]) > - req[n++] = obj->last_read_req[i]; > + if (obj->last_read[i].request) > + req[n++] = obj->last_read[i].request; > } > for (i = 0; i < n; i++) { > ret = __i915_gem_object_sync(obj, to, req[i]); > @@ -3695,8 +3695,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, > > BUILD_BUG_ON(I915_NUM_RINGS > 16); > args->busy = obj->active << 16; > - if (obj->last_write_req) > - args->busy |= obj->last_write_req->engine->id; > + if (obj->last_write.request) > + args->busy |= obj->last_write.request->engine->id; > > unref: > drm_gem_object_unreference(&obj->base); > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 6dee27224ddb..56d6b5dbb121 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -1125,7 +1125,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > > i915_vma_move_to_active(vma, req); > if (obj->base.write_domain) { > - i915_gem_request_assign(&obj->last_write_req, req); > + i915_gem_request_mark_active(req, &obj->last_write); > > intel_fb_obj_invalidate(obj, ORIGIN_CS); > > @@ -1133,7 +1133,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, > obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; > } > if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { > - i915_gem_request_assign(&obj->last_fenced_req, req); > + i915_gem_request_mark_active(req, &obj->last_fence); > if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { > struct drm_i915_private *dev_priv = req->i915; > list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, > diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c > index 598198543dcd..ab29c237ffa9 100644 > --- a/drivers/gpu/drm/i915/i915_gem_fence.c > +++ b/drivers/gpu/drm/i915/i915_gem_fence.c > @@ -261,12 +261,12 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) > static int > i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) > { > - if (obj->last_fenced_req) { > - int ret = i915_wait_request(obj->last_fenced_req); > + if (obj->last_fence.request) { > + int ret = i915_wait_request(obj->last_fence.request); > if (ret) > return ret; > > - i915_gem_request_assign(&obj->last_fenced_req, NULL); > + i915_gem_request_assign(&obj->last_fence.request, NULL); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h > index 2da9e0b5dfc7..0a21986c332b 100644 > --- a/drivers/gpu/drm/i915/i915_gem_request.h > +++ b/drivers/gpu/drm/i915/i915_gem_request.h > @@ -208,4 +208,42 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) > req->fence.seqno); > } > > +/* We treat requests as fences. This is not be to confused with our > + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. > + * We use the fences to synchronize access from the CPU with activity on the > + * GPU, for example, we should not rewrite an object's PTE whilst the GPU > + * is reading them. We also track fences at a higher level to provide > + * implicit synchronisation around GEM objects, e.g. set-domain will wait > + * for outstanding GPU rendering before marking the object ready for CPU > + * access, or a pageflip will wait until the GPU is complete before showing > + * the frame on the scanout. > + * > + * In order to use a fence, the object must track the fence it needs to > + * serialise with. For example, GEM objects want to track both read and > + * write access so that we can perform concurrent read operations between > + * the CPU and GPU engines, as well as waiting for all rendering to > + * complete, or waiting for the last GPU user of a "fence register". The > + * object then embeds a @i915_gem_active to track the most recent (in > + * retirment order) request relevant for the desired mode of access. > + * The @i915_gem_active is updated with i915_gem_request_mark_active() to > + * track the most recent fence request, typically this is done as part of > + * i915_vma_move_to_active(). > + * > + * When the @i915_gem_active completes (is retired), it will > + * signal its completion to the owner through a callback as well as mark > + * itself as idle (i915_gem_active.request == NULL). The owner > + * can then perform any action, such as delayed freeing of an active > + * resource including itself. > + */ > +struct i915_gem_active { > + struct drm_i915_gem_request *request; > +}; > + > +static inline void > +i915_gem_request_mark_active(struct drm_i915_gem_request *request, > + struct i915_gem_active *active) > +{ > + i915_gem_request_assign(&active->request, request); > +} This function name bothers me since I think the name is misleading and unintuitive. It is not marking a request as active but associating it with the second data structure. Maybe i915_gem_request_move_to_active to keep the mental association with the well established vma_move_to_active ? Maybe struct i915_gem_active could also be better called i915_gem_active_list ? It is not ideal because of the future little reversal of who is in who's list, so maybe there is something even better. But I think an intuitive name is really important for code clarity and maintainability. Regards, Tvrtko
On Mon, Jan 11, 2016 at 05:32:27PM +0000, Tvrtko Ursulin wrote: > >+struct i915_gem_active { > >+ struct drm_i915_gem_request *request; > >+}; > >+ > >+static inline void > >+i915_gem_request_mark_active(struct drm_i915_gem_request *request, > >+ struct i915_gem_active *active) > >+{ > >+ i915_gem_request_assign(&active->request, request); > >+} > > This function name bothers me since I think the name is misleading > and unintuitive. It is not marking a request as active but > associating it with the second data structure. > > Maybe i915_gem_request_move_to_active to keep the mental association > with the well established vma_move_to_active ? That's backwards imo, since it is the i915_gem_active that gets added to the request. (Or at least will be.) > Maybe struct i915_gem_active could also be better called > i915_gem_active_list ? It's not a list but a node. I started with drm_i915_gem_request_node, but that's too unwieldy and I felt even more confusing. > It is not ideal because of the future little reversal of who is in > who's list, so maybe there is something even better. But I think an > intuitive name is really important for code clarity and > maintainability. In userspace, I have the request (which is actually a userspace fence itself) containing a list of fences (that are identical to i915_gem_active, they track which request contains the reference and a callback for signalling) and those fences have a direct correspondence to, ARB_sync_objects, for example. But we already have plenty of conflict regarding the term fence, so that's no go. i915_gem_active, for me, made the association with the active-reference tracking that is ingrained into the objects and beyond. I quite like the connection with GPU activity i915_gem_retire_notifier? Hmm, I still like how i915_gem_active.request != NULL is quite self-descriptive. -Chris
On 11/01/16 22:49, Chris Wilson wrote: > On Mon, Jan 11, 2016 at 05:32:27PM +0000, Tvrtko Ursulin wrote: >>> +struct i915_gem_active { >>> + struct drm_i915_gem_request *request; >>> +}; >>> + >>> +static inline void >>> +i915_gem_request_mark_active(struct drm_i915_gem_request *request, >>> + struct i915_gem_active *active) >>> +{ >>> + i915_gem_request_assign(&active->request, request); >>> +} >> >> This function name bothers me since I think the name is misleading >> and unintuitive. It is not marking a request as active but >> associating it with the second data structure. >> >> Maybe i915_gem_request_move_to_active to keep the mental association >> with the well established vma_move_to_active ? > > That's backwards imo, since it is the i915_gem_active that gets added to > the request. (Or at least will be.) > >> Maybe struct i915_gem_active could also be better called >> i915_gem_active_list ? > > It's not a list but a node. I started with drm_i915_gem_request_node, > but that's too unwieldy and I felt even more confusing. > >> It is not ideal because of the future little reversal of who is in >> who's list, so maybe there is something even better. But I think an >> intuitive name is really important for code clarity and >> maintainability. > > In userspace, I have the request (which is actually a userspace fence > itself) containing a list of fences (that are identical to i915_gem_active, > they track which request contains the reference and a callback for > signalling) and those fences have a direct correspondence to, > ARB_sync_objects, for example. But we already have plenty of conflict > regarding the term fence, so that's no go. > > i915_gem_active, for me, made the association with the active-reference > tracking that is ingrained into the objects and beyond. I quite like the > connection with GPU activity > > i915_gem_retire_notifier? Hmm, I still like how > i915_gem_active.request != NULL is quite self-descriptive. Yes the last bit is neat. Perhaps then leave the structure name as is and just rename the function to i915_gem_request_assign_active? I think that describes better what is actually happening. Regards, Tvrtko
On Tue, Jan 12, 2016 at 10:04:20AM +0000, Tvrtko Ursulin wrote: > Perhaps then leave the structure name as is and just rename the > function to i915_gem_request_assign_active? I think that describes > better what is actually happening. i915_gem_request_update_active()? request_assign_active() says to me that it is the request we are acting on and it can have only one active entity. "update" could go either way. i915_gem_active_add_to_request() is the full version I guess, or just i915_gem_active_set(). i915_gem_request_mark_active() -> i915_gem_active_set() -Chris
On 12/01/16 11:01, Chris Wilson wrote: > On Tue, Jan 12, 2016 at 10:04:20AM +0000, Tvrtko Ursulin wrote: >> Perhaps then leave the structure name as is and just rename the >> function to i915_gem_request_assign_active? I think that describes >> better what is actually happening. > > i915_gem_request_update_active()? > > request_assign_active() says to me that it is the request we are acting > on and it can have only one active entity. "update" could go either way. > > i915_gem_active_add_to_request() is the full version I guess, or just > i915_gem_active_set(). i915_gem_active_add_to_request sounds good, but need to reorder the parameters then I think. Regards, Tvrtko
On 12/01/16 11:01, Chris Wilson wrote: > On Tue, Jan 12, 2016 at 10:04:20AM +0000, Tvrtko Ursulin wrote: >> Perhaps then leave the structure name as is and just rename the >> function to i915_gem_request_assign_active? I think that describes >> better what is actually happening. > > i915_gem_request_update_active()? > > request_assign_active() says to me that it is the request we are acting > on and it can have only one active entity. "update" could go either way. > > i915_gem_active_add_to_request() is the full version I guess, or just > i915_gem_active_set(). > > i915_gem_request_mark_active() -> i915_gem_active_set() Sorry, or the short version might be good enough and better in the code since shorter. Just I think parameters need to be re-ordered. Regards, Tvrtko
On Tue, Jan 12, 2016 at 01:44:13PM +0000, Tvrtko Ursulin wrote: > > On 12/01/16 11:01, Chris Wilson wrote: > >On Tue, Jan 12, 2016 at 10:04:20AM +0000, Tvrtko Ursulin wrote: > >>Perhaps then leave the structure name as is and just rename the > >>function to i915_gem_request_assign_active? I think that describes > >>better what is actually happening. > > > >i915_gem_request_update_active()? > > > >request_assign_active() says to me that it is the request we are acting > >on and it can have only one active entity. "update" could go either way. > > > >i915_gem_active_add_to_request() is the full version I guess, or just > >i915_gem_active_set(). > > > >i915_gem_request_mark_active() -> i915_gem_active_set() > > Sorry, or the short version might be good enough and better in the > code since shorter. Just I think parameters need to be re-ordered. Yes, i915_gem_active_set() would operate on the i915_gem_active and take i915_gem_request as its parameter. -Chris
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 8de944ed3369..65cb1d6a5d64 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->base.write_domain); for_each_ring(ring, dev_priv, i) seq_printf(m, "%x ", - i915_gem_request_get_seqno(obj->last_read_req[i])); + i915_gem_request_get_seqno(obj->last_read[i].request)); seq_printf(m, "] %x %x%s%s%s", - i915_gem_request_get_seqno(obj->last_write_req), - i915_gem_request_get_seqno(obj->last_fenced_req), + i915_gem_request_get_seqno(obj->last_write.request), + i915_gem_request_get_seqno(obj->last_fence.request), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) *t = '\0'; seq_printf(m, " (%s mappable)", s); } - if (obj->last_write_req != NULL) - seq_printf(m, " (%s)", obj->last_write_req->engine->name); + if (obj->last_write.request != NULL) + seq_printf(m, " (%s)", obj->last_write.request->engine->name); if (obj->frontbuffer_bits) seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cae448e238ca..c577f86d94f8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2110,11 +2110,10 @@ struct drm_i915_gem_object { * requests on one ring where the write request is older than the * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. - * */ - struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS]; - struct drm_i915_gem_request *last_write_req; - /** Breadcrumb of last fenced GPU access to the buffer. */ - struct drm_i915_gem_request *last_fenced_req; + */ + struct i915_gem_active last_read[I915_NUM_RINGS]; + struct i915_gem_active last_write; + struct i915_gem_active last_fence; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b0230e7151ce..77c253ddf060 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, return 0; if (readonly) { - if (obj->last_write_req != NULL) { - ret = i915_wait_request(obj->last_write_req); + if (obj->last_write.request != NULL) { + ret = i915_wait_request(obj->last_write.request); if (ret) return ret; - i = obj->last_write_req->engine->id; - if (obj->last_read_req[i] == obj->last_write_req) + i = obj->last_write.request->engine->id; + if (obj->last_read[i].request == obj->last_write.request) i915_gem_object_retire__read(obj, i); else i915_gem_object_retire__write(obj); } } else { for (i = 0; i < I915_NUM_RINGS; i++) { - if (obj->last_read_req[i] == NULL) + if (obj->last_read[i].request == NULL) continue; - ret = i915_wait_request(obj->last_read_req[i]); + ret = i915_wait_request(obj->last_read[i].request); if (ret) return ret; @@ -1151,9 +1151,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, { int ring = req->engine->id; - if (obj->last_read_req[ring] == req) + if (obj->last_read[ring].request == req) i915_gem_object_retire__read(obj, ring); - else if (obj->last_write_req == req) + else if (obj->last_write.request == req) i915_gem_object_retire__write(obj); i915_gem_request_retire_upto(req); @@ -1181,7 +1181,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, if (readonly) { struct drm_i915_gem_request *req; - req = obj->last_write_req; + req = obj->last_write.request; if (req == NULL) return 0; @@ -1190,7 +1190,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj, for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2070,7 +2070,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->active |= intel_engine_flag(engine); list_move_tail(&obj->ring_list[engine->id], &engine->active_list); - i915_gem_request_assign(&obj->last_read_req[engine->id], req); + i915_gem_request_mark_active(req, &obj->last_read[engine->id]); list_move_tail(&vma->mm_list, &vma->vm->active_list); } @@ -2078,10 +2078,10 @@ void i915_vma_move_to_active(struct i915_vma *vma, static void i915_gem_object_retire__write(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(obj->last_write_req == NULL); - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine))); + GEM_BUG_ON(obj->last_write.request == NULL); + GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine))); - i915_gem_request_assign(&obj->last_write_req, NULL); + i915_gem_request_assign(&obj->last_write.request, NULL); intel_fb_obj_flush(obj, true, ORIGIN_CS); } @@ -2090,13 +2090,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) { struct i915_vma *vma; - GEM_BUG_ON(obj->last_read_req[ring] == NULL); + GEM_BUG_ON(obj->last_read[ring].request == NULL); GEM_BUG_ON(!(obj->active & (1 << ring))); list_del_init(&obj->ring_list[ring]); - i915_gem_request_assign(&obj->last_read_req[ring], NULL); + i915_gem_request_assign(&obj->last_read[ring].request, NULL); - if (obj->last_write_req && obj->last_write_req->engine->id == ring) + if (obj->last_write.request && obj->last_write.request->engine->id == ring) i915_gem_object_retire__write(obj); obj->active &= ~(1 << ring); @@ -2115,7 +2115,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) list_move_tail(&vma->mm_list, &vma->vm->inactive_list); } - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_request_assign(&obj->last_fence.request, NULL); drm_gem_object_unreference(&obj->base); } @@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring) struct drm_i915_gem_object, ring_list[ring->id]); - if (!list_empty(&obj->last_read_req[ring->id]->list)) + if (!list_empty(&obj->last_read[ring->id].request->list)) break; i915_gem_object_retire__read(obj, ring->id); @@ -2445,7 +2445,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) for (i = 0; i < I915_NUM_RINGS; i++) { struct drm_i915_gem_request *req; - req = obj->last_read_req[i]; + req = obj->last_read[i].request; if (req == NULL) continue; @@ -2525,10 +2525,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) drm_gem_object_unreference(&obj->base); for (i = 0; i < I915_NUM_RINGS; i++) { - if (obj->last_read_req[i] == NULL) + if (obj->last_read[i].request == NULL) continue; - req[n++] = i915_gem_request_get(obj->last_read_req[i]); + req[n++] = i915_gem_request_get(obj->last_read[i].request); } mutex_unlock(&dev->struct_mutex); @@ -2619,12 +2619,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, n = 0; if (readonly) { - if (obj->last_write_req) - req[n++] = obj->last_write_req; + if (obj->last_write.request) + req[n++] = obj->last_write.request; } else { for (i = 0; i < I915_NUM_RINGS; i++) - if (obj->last_read_req[i]) - req[n++] = obj->last_read_req[i]; + if (obj->last_read[i].request) + req[n++] = obj->last_read[i].request; } for (i = 0; i < n; i++) { ret = __i915_gem_object_sync(obj, to, req[i]); @@ -3695,8 +3695,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, BUILD_BUG_ON(I915_NUM_RINGS > 16); args->busy = obj->active << 16; - if (obj->last_write_req) - args->busy |= obj->last_write_req->engine->id; + if (obj->last_write.request) + args->busy |= obj->last_write.request->engine->id; unref: drm_gem_object_unreference(&obj->base); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6dee27224ddb..56d6b5dbb121 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1125,7 +1125,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req); if (obj->base.write_domain) { - i915_gem_request_assign(&obj->last_write_req, req); + i915_gem_request_mark_active(req, &obj->last_write); intel_fb_obj_invalidate(obj, ORIGIN_CS); @@ -1133,7 +1133,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_request_assign(&obj->last_fenced_req, req); + i915_gem_request_mark_active(req, &obj->last_fence); if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { struct drm_i915_private *dev_priv = req->i915; list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 598198543dcd..ab29c237ffa9 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -261,12 +261,12 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) static int i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) { - if (obj->last_fenced_req) { - int ret = i915_wait_request(obj->last_fenced_req); + if (obj->last_fence.request) { + int ret = i915_wait_request(obj->last_fence.request); if (ret) return ret; - i915_gem_request_assign(&obj->last_fenced_req, NULL); + i915_gem_request_assign(&obj->last_fence.request, NULL); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 2da9e0b5dfc7..0a21986c332b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -208,4 +208,42 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req) req->fence.seqno); } +/* We treat requests as fences. This is not be to confused with our + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. + * We use the fences to synchronize access from the CPU with activity on the + * GPU, for example, we should not rewrite an object's PTE whilst the GPU + * is reading them. We also track fences at a higher level to provide + * implicit synchronisation around GEM objects, e.g. set-domain will wait + * for outstanding GPU rendering before marking the object ready for CPU + * access, or a pageflip will wait until the GPU is complete before showing + * the frame on the scanout. + * + * In order to use a fence, the object must track the fence it needs to + * serialise with. For example, GEM objects want to track both read and + * write access so that we can perform concurrent read operations between + * the CPU and GPU engines, as well as waiting for all rendering to + * complete, or waiting for the last GPU user of a "fence register". The + * object then embeds a @i915_gem_active to track the most recent (in + * retirment order) request relevant for the desired mode of access. + * The @i915_gem_active is updated with i915_gem_request_mark_active() to + * track the most recent fence request, typically this is done as part of + * i915_vma_move_to_active(). + * + * When the @i915_gem_active completes (is retired), it will + * signal its completion to the owner through a callback as well as mark + * itself as idle (i915_gem_active.request == NULL). The owner + * can then perform any action, such as delayed freeing of an active + * resource including itself. + */ +struct i915_gem_active { + struct drm_i915_gem_request *request; +}; + +static inline void +i915_gem_request_mark_active(struct drm_i915_gem_request *request, + struct i915_gem_active *active) +{ + i915_gem_request_assign(&active->request, request); +} + #endif /* I915_GEM_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 7410f6c962e7..c7588135a82d 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -242,7 +242,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, } obj->fence_dirty = - obj->last_fenced_req || + obj->last_fence.request || obj->fence_reg != I915_FENCE_REG_NONE; obj->tiling_mode = args->tiling_mode; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2785f2d1f073..5027636e3624 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -708,8 +708,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->size = obj->base.size; err->name = obj->base.name; for (i = 0; i < I915_NUM_RINGS; i++) - err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read_req[i]); - err->wseqno = i915_gem_request_get_seqno(obj->last_write_req); + err->rseqno[i] = i915_gem_request_get_seqno(obj->last_read[i].request); + err->wseqno = i915_gem_request_get_seqno(obj->last_write.request); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -721,7 +721,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; - err->ring = obj->last_write_req ? obj->last_write_req->engine->id : -1; + err->ring = obj->last_write.request ? obj->last_write.request->engine->id : -1; err->cache_level = obj->cache_level; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ec52fff7e0b0..eef858d5376f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11310,7 +11310,7 @@ static bool use_mmio_flip(struct intel_engine_cs *ring, false)) return true; else - return ring != i915_gem_request_get_engine(obj->last_write_req); + return ring != i915_gem_request_get_engine(obj->last_write.request); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -11455,7 +11455,7 @@ static int intel_queue_mmio_flip(struct drm_device *dev, return -ENOMEM; mmio_flip->i915 = to_i915(dev); - mmio_flip->req = i915_gem_request_get(obj->last_write_req); + mmio_flip->req = i915_gem_request_get(obj->last_write.request); mmio_flip->crtc = to_intel_crtc(crtc); mmio_flip->rotation = crtc->primary->state->rotation; @@ -11654,7 +11654,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { ring = &dev_priv->ring[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - ring = i915_gem_request_get_engine(obj->last_write_req); + ring = i915_gem_request_get_engine(obj->last_write.request); if (ring == NULL || ring->id != RCS) ring = &dev_priv->ring[BCS]; } else { @@ -11695,7 +11695,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, goto cleanup_unpin; i915_gem_request_assign(&work->flip_queued_req, - obj->last_write_req); + obj->last_write.request); } else { ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, request, page_flip_flags); @@ -13895,7 +13895,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, to_intel_plane_state(new_state); i915_gem_request_assign(&plane_state->wait_req, - obj->last_write_req); + obj->last_write.request); } i915_gem_track_fb(old_obj, obj, intel_plane->frontbuffer_bit);
In the next patch, request tracking is made more generic and for that we need a new expanded struct and to separate out the logic changes from the mechanical churn, we split out the structure renaming into this patch. v2: Writer's block. Add some spiel about why we track requests. v3: Now i915_gem_active. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/i915_debugfs.c | 10 +++--- drivers/gpu/drm/i915/i915_drv.h | 9 +++-- drivers/gpu/drm/i915/i915_gem.c | 56 +++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +-- drivers/gpu/drm/i915/i915_gem_fence.c | 6 ++-- drivers/gpu/drm/i915/i915_gem_request.h | 38 ++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++-- drivers/gpu/drm/i915/intel_display.c | 10 +++--- 9 files changed, 89 insertions(+), 52 deletions(-)