Message ID | 20180629075348.27358-9-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 29/06/2018 08:53, Chris Wilson wrote: > The kernel recently gained an augmented rbtree with the purpose of > cacheing the leftmost element of the rbtree, a frequent optimisation to > avoid calls to rb_first() which is also employed by the > execlists->queue. Switch from our open-coded cache to the library. > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > --- > drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++--- > drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++----- > drivers/gpu/drm/i915/intel_lrc.c | 32 +++++++-------------- > drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +---- > 4 files changed, 19 insertions(+), 39 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 457003311b74..14d5b673fc27 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -464,8 +464,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) > GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); > > execlists->queue_priority = INT_MIN; > - execlists->queue = RB_ROOT; > - execlists->first = NULL; > + execlists->queue = RB_ROOT_CACHED; > } > > /** > @@ -1000,7 +999,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) > } > > /* ELSP is empty, but there are ready requests? E.g. after reset */ > - if (READ_ONCE(engine->execlists.first)) > + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) > return false; > > /* Ring stopped? */ > @@ -1615,7 +1614,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, > last = NULL; > count = 0; > drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); > - for (rb = execlists->first; rb; rb = rb_next(rb)) { > + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { > struct i915_priolist *p = > rb_entry(rb, typeof(*p), node); > > diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c > index 05449f636d94..9a2c6856a71e 100644 > --- a/drivers/gpu/drm/i915/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/intel_guc_submission.c > @@ -694,9 +694,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) > > lockdep_assert_held(&engine->timeline.lock); > > - rb = execlists->first; > - GEM_BUG_ON(rb_first(&execlists->queue) != rb); > - > if (port_isset(port)) { > if (intel_engine_has_preemption(engine)) { > struct guc_preempt_work *preempt_work = > @@ -718,7 +715,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) > } > GEM_BUG_ON(port_isset(port)); > > - while (rb) { > + while ((rb = rb_first_cached(&execlists->queue))) { > struct i915_priolist *p = to_priolist(rb); > struct i915_request *rq, *rn; > > @@ -743,15 +740,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) > submit = true; > } > > - rb = rb_next(rb); > - rb_erase(&p->node, &execlists->queue); > + rb_erase_cached(&p->node, &execlists->queue); > INIT_LIST_HEAD(&p->requests); > if (p->priority != I915_PRIORITY_NORMAL) > kmem_cache_free(engine->i915->priorities, p); > } > done: > execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; > - execlists->first = rb; > if (submit) > port_assign(port, last); > if (last) > @@ -760,7 +755,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) > /* We must always keep the beast fed if we have work piled up */ > GEM_BUG_ON(port_isset(execlists->port) && > !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); > - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > + GEM_BUG_ON(rb_first_cached(&execlists->queue) && > + !port_isset(execlists->port)); > > return submit; > } > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index a6bc50d7195e..422753c8641f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) > find_priolist: > /* most positive priority is scheduled first, equal priorities fifo */ > rb = NULL; > - parent = &execlists->queue.rb_node; > + parent = &execlists->queue.rb_root.rb_node; > while (*parent) { > rb = *parent; > p = to_priolist(rb); > @@ -311,10 +311,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) > p->priority = prio; > INIT_LIST_HEAD(&p->requests); > rb_link_node(&p->node, rb, parent); > - rb_insert_color(&p->node, &execlists->queue); > - > - if (first) > - execlists->first = &p->node; > + rb_insert_color_cached(&p->node, &execlists->queue, first); > > return p; > } > @@ -598,9 +595,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > * and context switches) submission. > */ > > - rb = execlists->first; > - GEM_BUG_ON(rb_first(&execlists->queue) != rb); > - > if (last) { > /* > * Don't resubmit or switch until all outstanding > @@ -662,7 +656,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > last->tail = last->wa_tail; > } > > - while (rb) { > + while ((rb = rb_first_cached(&execlists->queue))) { > struct i915_priolist *p = to_priolist(rb); > struct i915_request *rq, *rn; > > @@ -721,8 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > submit = true; > } > > - rb = rb_next(rb); > - rb_erase(&p->node, &execlists->queue); > + rb_erase_cached(&p->node, &execlists->queue); > INIT_LIST_HEAD(&p->requests); > if (p->priority != I915_PRIORITY_NORMAL) > kmem_cache_free(engine->i915->priorities, p); > @@ -748,14 +741,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) > execlists->queue_priority = > port != execlists->port ? rq_prio(last) : INT_MIN; > > - execlists->first = rb; > if (submit) { > port_assign(port, last); > execlists_submit_ports(engine); > } > > /* We must always keep the beast fed if we have work piled up */ > - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); > + GEM_BUG_ON(rb_first_cached(&execlists->queue) && > + !port_isset(execlists->port)); > > /* Re-evaluate the executing context setup after each preemptive kick */ > if (last) > @@ -915,8 +908,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > } > > /* Flush the queued requests to the timeline list (for retiring). */ > - rb = execlists->first; > - while (rb) { > + while ((rb = rb_first_cached(&execlists->queue))) { > struct i915_priolist *p = to_priolist(rb); > > list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { > @@ -926,8 +918,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > __i915_request_submit(rq); > } > > - rb = rb_next(rb); > - rb_erase(&p->node, &execlists->queue); > + rb_erase_cached(&p->node, &execlists->queue); > INIT_LIST_HEAD(&p->requests); > if (p->priority != I915_PRIORITY_NORMAL) > kmem_cache_free(engine->i915->priorities, p); > @@ -936,8 +927,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) > /* Remaining _unready_ requests will be nop'ed when submitted */ > > execlists->queue_priority = INT_MIN; > - execlists->queue = RB_ROOT; > - execlists->first = NULL; > + execlists->queue = RB_ROOT_CACHED; > GEM_BUG_ON(port_isset(execlists->port)); > > spin_unlock_irqrestore(&engine->timeline.lock, flags); > @@ -1183,7 +1173,7 @@ static void execlists_submit_request(struct i915_request *request) > > queue_request(engine, &request->sched, rq_prio(request)); > > - GEM_BUG_ON(!engine->execlists.first); > + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); > GEM_BUG_ON(list_empty(&request->sched.link)); > > submit_queue(engine, rq_prio(request)); > @@ -2036,7 +2026,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) > struct intel_engine_execlists * const execlists = &engine->execlists; > > /* After a GPU reset, we may have requests to replay */ > - if (execlists->first) > + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) > tasklet_schedule(&execlists->tasklet); > > /* > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index a1aff360d0ce..923875500828 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -292,12 +292,7 @@ struct intel_engine_execlists { > /** > * @queue: queue of requests, in priority lists > */ > - struct rb_root queue; > - > - /** > - * @first: leftmost level in priority @queue > - */ > - struct rb_node *first; > + struct rb_root_cached queue; > > /** > * @csb_read: control register for Context Switch buffer > All checks out AFAICT. Nice that we can now simplify! Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 457003311b74..14d5b673fc27 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -464,8 +464,7 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; } /** @@ -1000,7 +999,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) } /* ELSP is empty, but there are ready requests? E.g. after reset */ - if (READ_ONCE(engine->execlists.first)) + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) return false; /* Ring stopped? */ @@ -1615,7 +1614,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, last = NULL; count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); - for (rb = execlists->first; rb; rb = rb_next(rb)) { + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 05449f636d94..9a2c6856a71e 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -694,9 +694,6 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) lockdep_assert_held(&engine->timeline.lock); - rb = execlists->first; - GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (port_isset(port)) { if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = @@ -718,7 +715,7 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) } GEM_BUG_ON(port_isset(port)); - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -743,15 +740,13 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) submit = true; } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; - execlists->first = rb; if (submit) port_assign(port, last); if (last) @@ -760,7 +755,8 @@ static bool __guc_dequeue(struct intel_engine_cs *engine) /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + GEM_BUG_ON(rb_first_cached(&execlists->queue) && + !port_isset(execlists->port)); return submit; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index a6bc50d7195e..422753c8641f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -273,7 +273,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) find_priolist: /* most positive priority is scheduled first, equal priorities fifo */ rb = NULL; - parent = &execlists->queue.rb_node; + parent = &execlists->queue.rb_root.rb_node; while (*parent) { rb = *parent; p = to_priolist(rb); @@ -311,10 +311,7 @@ lookup_priolist(struct intel_engine_cs *engine, int prio) p->priority = prio; INIT_LIST_HEAD(&p->requests); rb_link_node(&p->node, rb, parent); - rb_insert_color(&p->node, &execlists->queue); - - if (first) - execlists->first = &p->node; + rb_insert_color_cached(&p->node, &execlists->queue, first); return p; } @@ -598,9 +595,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - rb = execlists->first; - GEM_BUG_ON(rb_first(&execlists->queue) != rb); - if (last) { /* * Don't resubmit or switch until all outstanding @@ -662,7 +656,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) last->tail = last->wa_tail; } - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; @@ -721,8 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) submit = true; } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); @@ -748,14 +741,14 @@ static void execlists_dequeue(struct intel_engine_cs *engine) execlists->queue_priority = port != execlists->port ? rq_prio(last) : INT_MIN; - execlists->first = rb; if (submit) { port_assign(port, last); execlists_submit_ports(engine); } /* We must always keep the beast fed if we have work piled up */ - GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); + GEM_BUG_ON(rb_first_cached(&execlists->queue) && + !port_isset(execlists->port)); /* Re-evaluate the executing context setup after each preemptive kick */ if (last) @@ -915,8 +908,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) } /* Flush the queued requests to the timeline list (for retiring). */ - rb = execlists->first; - while (rb) { + while ((rb = rb_first_cached(&execlists->queue))) { struct i915_priolist *p = to_priolist(rb); list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { @@ -926,8 +918,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) __i915_request_submit(rq); } - rb = rb_next(rb); - rb_erase(&p->node, &execlists->queue); + rb_erase_cached(&p->node, &execlists->queue); INIT_LIST_HEAD(&p->requests); if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); @@ -936,8 +927,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Remaining _unready_ requests will be nop'ed when submitted */ execlists->queue_priority = INT_MIN; - execlists->queue = RB_ROOT; - execlists->first = NULL; + execlists->queue = RB_ROOT_CACHED; GEM_BUG_ON(port_isset(execlists->port)); spin_unlock_irqrestore(&engine->timeline.lock, flags); @@ -1183,7 +1173,7 @@ static void execlists_submit_request(struct i915_request *request) queue_request(engine, &request->sched, rq_prio(request)); - GEM_BUG_ON(!engine->execlists.first); + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); GEM_BUG_ON(list_empty(&request->sched.link)); submit_queue(engine, rq_prio(request)); @@ -2036,7 +2026,7 @@ static void execlists_reset_finish(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; /* After a GPU reset, we may have requests to replay */ - if (execlists->first) + if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) tasklet_schedule(&execlists->tasklet); /* diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a1aff360d0ce..923875500828 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -292,12 +292,7 @@ struct intel_engine_execlists { /** * @queue: queue of requests, in priority lists */ - struct rb_root queue; - - /** - * @first: leftmost level in priority @queue - */ - struct rb_node *first; + struct rb_root_cached queue; /** * @csb_read: control register for Context Switch buffer
The kernel recently gained an augmented rbtree with the purpose of cacheing the leftmost element of the rbtree, a frequent optimisation to avoid calls to rb_first() which is also employed by the execlists->queue. Switch from our open-coded cache to the library. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> --- drivers/gpu/drm/i915/intel_engine_cs.c | 7 ++--- drivers/gpu/drm/i915/intel_guc_submission.c | 12 +++----- drivers/gpu/drm/i915/intel_lrc.c | 32 +++++++-------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +---- 4 files changed, 19 insertions(+), 39 deletions(-)