Message ID | 20220111163019.13694-1-matthew.brost@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | drm/i915: Flip guc_id allocation partition | expand |
On 11/01/2022 16:30, Matthew Brost wrote: > Move the multi-lrc guc_id from the lower allocation partition (0 to > number of multi-lrc guc_ids) to upper allocation partition (number of > single-lrc to max guc_ids). Just a reminder that best practice for commit messages is to include the "why" as well. Regards, Tvrtko > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- > 1 file changed, 42 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 9989d121127df..1bacc9621cea8 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > */ > #define NUMBER_MULTI_LRC_GUC_ID(guc) \ > ((guc)->submission_state.num_guc_ids / 16) > +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ > + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) > > /* > * Below is a set of functions which control the GuC scheduling state which > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) > INIT_WORK(&guc->submission_state.destroyed_worker, > destroyed_worker_func); > > - guc->submission_state.guc_ids_bitmap = > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > - if (!guc->submission_state.guc_ids_bitmap) > - return -ENOMEM; > - > spin_lock_init(&guc->timestamp.lock); > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) > guc_flush_destroyed_contexts(guc); > guc_lrc_desc_pool_destroy(guc); > i915_sched_engine_put(guc->sched_engine); > - bitmap_free(guc->submission_state.guc_ids_bitmap); > + if (guc->submission_state.guc_ids_bitmap) > + bitmap_free(guc->submission_state.guc_ids_bitmap); > } > > static inline void queue_request(struct i915_sched_engine *sched_engine, > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > spin_unlock_irqrestore(&sched_engine->lock, flags); > } > > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + int ret; > + > + GEM_BUG_ON(!intel_context_is_parent(ce)); > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > + > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > + NUMBER_MULTI_LRC_GUC_ID(guc), > + order_base_2(ce->parallel.number_children > + + 1)); > + if (likely(!(ret < 0))) > + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); > + > + return ret; > +} > + > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + GEM_BUG_ON(intel_context_is_parent(ce)); > + > + return ida_simple_get(&guc->submission_state.guc_ids, > + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > + __GFP_NOWARN); > +} > + > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > { > int ret; > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > GEM_BUG_ON(intel_context_is_child(ce)); > > if (intel_context_is_parent(ce)) > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - order_base_2(ce->parallel.number_children > - + 1)); > + ret = new_mlrc_guc_id(guc, ce); > else > - ret = ida_simple_get(&guc->submission_state.guc_ids, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - guc->submission_state.num_guc_ids, > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > - __GFP_NOWARN); > + ret = new_slrc_guc_id(guc, ce); > + > if (unlikely(ret < 0)) > return ret; > > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > > + if (unlikely(intel_context_is_parent(ce) && > + !guc->submission_state.guc_ids_bitmap)) { > + guc->submission_state.guc_ids_bitmap = > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > + if (!guc->submission_state.guc_ids_bitmap) > + return -ENOMEM; > + } > + > try_again: > spin_lock_irqsave(&guc->submission_state.lock, flags); > >
Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote on śro [2022-sty-12 08:54:19 +0000]: > > On 11/01/2022 16:30, Matthew Brost wrote: > > Move the multi-lrc guc_id from the lower allocation partition (0 to > > number of multi-lrc guc_ids) to upper allocation partition (number of > > single-lrc to max guc_ids). > > Just a reminder that best practice for commit messages is to include the > "why" as well. > > Regards, > > Tvrtko > In my opinion this patch is good step forward. Lazy allocation of the bitmap for MLRC and moving the MLRC pool to the end will allow easier development contexts for SR-IOV. Introduction of two new helpers (new_mlrc_guc_id and new_slrc_guc_id) cleans up the code. I agree with Tvrtko's comment that you should expand your commit message. One thing I personally don't like is this NUMBER_SINGLE_LRC_GUC_ID definition (same for MLRC) In my opinion it should be inline function and this value 1/16 defined as constant - Piotr > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- > > 1 file changed, 42 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 9989d121127df..1bacc9621cea8 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > > */ > > #define NUMBER_MULTI_LRC_GUC_ID(guc) \ > > ((guc)->submission_state.num_guc_ids / 16) > > +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ > > + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) > > /* > > * Below is a set of functions which control the GuC scheduling state which > > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) > > INIT_WORK(&guc->submission_state.destroyed_worker, > > destroyed_worker_func); > > - guc->submission_state.guc_ids_bitmap = > > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > - if (!guc->submission_state.guc_ids_bitmap) > > - return -ENOMEM; > > - > > spin_lock_init(&guc->timestamp.lock); > > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > > guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; > > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) > > guc_flush_destroyed_contexts(guc); > > guc_lrc_desc_pool_destroy(guc); > > i915_sched_engine_put(guc->sched_engine); > > - bitmap_free(guc->submission_state.guc_ids_bitmap); > > + if (guc->submission_state.guc_ids_bitmap) > > + bitmap_free(guc->submission_state.guc_ids_bitmap); > > } > > static inline void queue_request(struct i915_sched_engine *sched_engine, > > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > > spin_unlock_irqrestore(&sched_engine->lock, flags); > > } > > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > +{ > > + int ret; > > + > > + GEM_BUG_ON(!intel_context_is_parent(ce)); > > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > > + > > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > + NUMBER_MULTI_LRC_GUC_ID(guc), > > + order_base_2(ce->parallel.number_children > > + + 1)); > > + if (likely(!(ret < 0))) > > + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); > > + > > + return ret; > > +} > > + > > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > +{ > > + GEM_BUG_ON(intel_context_is_parent(ce)); > > + > > + return ida_simple_get(&guc->submission_state.guc_ids, > > + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), > > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > + __GFP_NOWARN); > > +} > > + > > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > { > > int ret; > > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > GEM_BUG_ON(intel_context_is_child(ce)); > > if (intel_context_is_parent(ce)) > > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > - order_base_2(ce->parallel.number_children > > - + 1)); > > + ret = new_mlrc_guc_id(guc, ce); > > else > > - ret = ida_simple_get(&guc->submission_state.guc_ids, > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > - guc->submission_state.num_guc_ids, > > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > - __GFP_NOWARN); > > + ret = new_slrc_guc_id(guc, ce); > > + > > if (unlikely(ret < 0)) > > return ret; > > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > > + if (unlikely(intel_context_is_parent(ce) && > > + !guc->submission_state.guc_ids_bitmap)) { > > + guc->submission_state.guc_ids_bitmap = > > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > + if (!guc->submission_state.guc_ids_bitmap) > > + return -ENOMEM; > > + } > > + > > try_again: > > spin_lock_irqsave(&guc->submission_state.lock, flags); > > --
On Wed, Jan 12, 2022 at 06:09:06PM +0100, Piotr Piórkowski wrote: > Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> wrote on śro [2022-sty-12 08:54:19 +0000]: > > > > On 11/01/2022 16:30, Matthew Brost wrote: > > > Move the multi-lrc guc_id from the lower allocation partition (0 to > > > number of multi-lrc guc_ids) to upper allocation partition (number of > > > single-lrc to max guc_ids). > > > > Just a reminder that best practice for commit messages is to include the > > "why" as well. > > > > Regards, > > > > Tvrtko > > > > In my opinion this patch is good step forward. > Lazy allocation of the bitmap for MLRC and moving the MLRC pool to the > end will allow easier development contexts for SR-IOV. > Introduction of two new helpers (new_mlrc_guc_id and new_slrc_guc_id) cleans up the code. > > I agree with Tvrtko's comment that you should expand your commit > message. > Agree. Didn't know if I could talk about SR-IOV publicly but clearly can so add an explaination in the next rev. > One thing I personally don't like is this NUMBER_SINGLE_LRC_GUC_ID definition (same for MLRC) > In my opinion it should be inline function and this value 1/16 defined as constant Agree. I'll move these to functions in next rev. Matt > > - Piotr > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > > --- > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- > > > 1 file changed, 42 insertions(+), 15 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > index 9989d121127df..1bacc9621cea8 100644 > > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > > > */ > > > #define NUMBER_MULTI_LRC_GUC_ID(guc) \ > > > ((guc)->submission_state.num_guc_ids / 16) > > > +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ > > > + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) > > > /* > > > * Below is a set of functions which control the GuC scheduling state which > > > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) > > > INIT_WORK(&guc->submission_state.destroyed_worker, > > > destroyed_worker_func); > > > - guc->submission_state.guc_ids_bitmap = > > > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > > - if (!guc->submission_state.guc_ids_bitmap) > > > - return -ENOMEM; > > > - > > > spin_lock_init(&guc->timestamp.lock); > > > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > > > guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; > > > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) > > > guc_flush_destroyed_contexts(guc); > > > guc_lrc_desc_pool_destroy(guc); > > > i915_sched_engine_put(guc->sched_engine); > > > - bitmap_free(guc->submission_state.guc_ids_bitmap); > > > + if (guc->submission_state.guc_ids_bitmap) > > > + bitmap_free(guc->submission_state.guc_ids_bitmap); > > > } > > > static inline void queue_request(struct i915_sched_engine *sched_engine, > > > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > > > spin_unlock_irqrestore(&sched_engine->lock, flags); > > > } > > > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > +{ > > > + int ret; > > > + > > > + GEM_BUG_ON(!intel_context_is_parent(ce)); > > > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > > > + > > > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > > + NUMBER_MULTI_LRC_GUC_ID(guc), > > > + order_base_2(ce->parallel.number_children > > > + + 1)); > > > + if (likely(!(ret < 0))) > > > + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); > > > + > > > + return ret; > > > +} > > > + > > > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > +{ > > > + GEM_BUG_ON(intel_context_is_parent(ce)); > > > + > > > + return ida_simple_get(&guc->submission_state.guc_ids, > > > + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), > > > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > > + __GFP_NOWARN); > > > +} > > > + > > > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > { > > > int ret; > > > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > GEM_BUG_ON(intel_context_is_child(ce)); > > > if (intel_context_is_parent(ce)) > > > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > > - order_base_2(ce->parallel.number_children > > > - + 1)); > > > + ret = new_mlrc_guc_id(guc, ce); > > > else > > > - ret = ida_simple_get(&guc->submission_state.guc_ids, > > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > > - guc->submission_state.num_guc_ids, > > > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > > - __GFP_NOWARN); > > > + ret = new_slrc_guc_id(guc, ce); > > > + > > > if (unlikely(ret < 0)) > > > return ret; > > > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > > > + if (unlikely(intel_context_is_parent(ce) && > > > + !guc->submission_state.guc_ids_bitmap)) { > > > + guc->submission_state.guc_ids_bitmap = > > > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > > + if (!guc->submission_state.guc_ids_bitmap) > > > + return -ENOMEM; > > > + } > > > + > > > try_again: > > > spin_lock_irqsave(&guc->submission_state.lock, flags); > > > > > --
On 11.01.2022 17:30, Matthew Brost wrote: > Move the multi-lrc guc_id from the lower allocation partition (0 to > number of multi-lrc guc_ids) to upper allocation partition (number of > single-lrc to max guc_ids). > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > --- > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- > 1 file changed, 42 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > index 9989d121127df..1bacc9621cea8 100644 > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > */ > #define NUMBER_MULTI_LRC_GUC_ID(guc) \ > ((guc)->submission_state.num_guc_ids / 16) > +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ > + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) above two will likely look better if converted into inline functions, or even better if we explicitly store slrc/mlrc upper/lower id limits under guc submission state > > /* > * Below is a set of functions which control the GuC scheduling state which > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) > INIT_WORK(&guc->submission_state.destroyed_worker, > destroyed_worker_func); > > - guc->submission_state.guc_ids_bitmap = > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > - if (!guc->submission_state.guc_ids_bitmap) > - return -ENOMEM; > - > spin_lock_init(&guc->timestamp.lock); > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) > guc_flush_destroyed_contexts(guc); > guc_lrc_desc_pool_destroy(guc); > i915_sched_engine_put(guc->sched_engine); > - bitmap_free(guc->submission_state.guc_ids_bitmap); > + if (guc->submission_state.guc_ids_bitmap) > + bitmap_free(guc->submission_state.guc_ids_bitmap); it should be fine to pass NULL to bitmap_free, no? > } > > static inline void queue_request(struct i915_sched_engine *sched_engine, > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > spin_unlock_irqrestore(&sched_engine->lock, flags); > } > > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + int ret; > + > + GEM_BUG_ON(!intel_context_is_parent(ce)); > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > + > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > + NUMBER_MULTI_LRC_GUC_ID(guc), > + order_base_2(ce->parallel.number_children > + + 1)); btw, is there any requirement (GuC ABI ?) that allocated ids need to be allocated with power of 2 alignment ? I don't think that we must optimize that hard and in some cases waste extra ids (as we might be limited on some configs) > + if (likely(!(ret < 0))) > + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); nit: more readable would be if (unlikely(ret < 0)) return ret; return ret + guc->submission_state.mlrc_base; > + > + return ret; > +} > + > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > +{ > + GEM_BUG_ON(intel_context_is_parent(ce)); do we really need ce here ? > + > + return ida_simple_get(&guc->submission_state.guc_ids, > + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from static split into more dynamic, then we could likely implement lazy increase of available slrc/mlrc id limits on demand, within available range, without deciding upfront of the hardcoded split 15 : 1 but this can be done next time ;) > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > + __GFP_NOWARN); > +} > + > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > { > int ret; > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > GEM_BUG_ON(intel_context_is_child(ce)); > > if (intel_context_is_parent(ce)) > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - order_base_2(ce->parallel.number_children > - + 1)); > + ret = new_mlrc_guc_id(guc, ce); > else > - ret = ida_simple_get(&guc->submission_state.guc_ids, > - NUMBER_MULTI_LRC_GUC_ID(guc), > - guc->submission_state.num_guc_ids, > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > - __GFP_NOWARN); > + ret = new_slrc_guc_id(guc, ce); > + with above helpers introduced, shouldn't we move code from new_guc_id() to assign_guc_id() ? > if (unlikely(ret < 0)) > return ret; > > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > > + if (unlikely(intel_context_is_parent(ce) && > + !guc->submission_state.guc_ids_bitmap)) { > + guc->submission_state.guc_ids_bitmap = > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > + if (!guc->submission_state.guc_ids_bitmap) > + return -ENOMEM; > + } maybe move this chunk to new_mlrc_guc_id() ? or we can't due to the spin_lock below ? but then how do you protect guc_ids_bitmap pointer itself ? -Michal > + > try_again: > spin_lock_irqsave(&guc->submission_state.lock, flags); >
On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote: > > > On 11.01.2022 17:30, Matthew Brost wrote: > > Move the multi-lrc guc_id from the lower allocation partition (0 to > > number of multi-lrc guc_ids) to upper allocation partition (number of > > single-lrc to max guc_ids). > > > > Signed-off-by: Matthew Brost <matthew.brost@intel.com> > > --- > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- > > 1 file changed, 42 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > index 9989d121127df..1bacc9621cea8 100644 > > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c > > @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, > > */ > > #define NUMBER_MULTI_LRC_GUC_ID(guc) \ > > ((guc)->submission_state.num_guc_ids / 16) > > +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ > > + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) > > above two will likely look better if converted into inline functions, or > even better if we explicitly store slrc/mlrc upper/lower id limits under > guc submission state > Definitely inline functions, or I guess variables work too but that might be overkill. Let me play around with this and see how it looks. > > > > /* > > * Below is a set of functions which control the GuC scheduling state which > > @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) > > INIT_WORK(&guc->submission_state.destroyed_worker, > > destroyed_worker_func); > > > > - guc->submission_state.guc_ids_bitmap = > > - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > - if (!guc->submission_state.guc_ids_bitmap) > > - return -ENOMEM; > > - > > spin_lock_init(&guc->timestamp.lock); > > INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); > > guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; > > @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) > > guc_flush_destroyed_contexts(guc); > > guc_lrc_desc_pool_destroy(guc); > > i915_sched_engine_put(guc->sched_engine); > > - bitmap_free(guc->submission_state.guc_ids_bitmap); > > + if (guc->submission_state.guc_ids_bitmap) > > + bitmap_free(guc->submission_state.guc_ids_bitmap); > > it should be fine to pass NULL to bitmap_free, no? > Probably? I'll double check on this. > > } > > > > static inline void queue_request(struct i915_sched_engine *sched_engine, > > @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > > spin_unlock_irqrestore(&sched_engine->lock, flags); > > } > > > > +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > +{ > > + int ret; > > + > > + GEM_BUG_ON(!intel_context_is_parent(ce)); > > + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > > + > > + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > + NUMBER_MULTI_LRC_GUC_ID(guc), > > + order_base_2(ce->parallel.number_children > > + + 1)); > > btw, is there any requirement (GuC ABI ?) that allocated ids need > to be allocated with power of 2 alignment ? I don't think that we > must optimize that hard and in some cases waste extra ids (as we might > be limited on some configs) > No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and didn't optmize this. > > + if (likely(!(ret < 0))) > > + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); > > nit: more readable would be > > if (unlikely(ret < 0)) > return ret; > > return ret + guc->submission_state.mlrc_base; > Sure. > > + > > + return ret; > > +} > > + > > +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > > +{ > > + GEM_BUG_ON(intel_context_is_parent(ce)); > > do we really need ce here ? > Just for the GEM_BUG_ON... Can remove if it is a big deal. > > + > > + return ida_simple_get(&guc->submission_state.guc_ids, > > + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), > > if we change the logic of NUMBER_SINGLE/MULTI_LRC_GUC_ID macros from > static split into more dynamic, then we could likely implement lazy > increase of available slrc/mlrc id limits on demand, within available > range, without deciding upfront of the hardcoded split 15 : 1 > > but this can be done next time ;) > Yea I guess. Doubt we need anything beyond a static split tho. > > + GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > + __GFP_NOWARN); > > +} > > + > > static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > { > > int ret; > > @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) > > GEM_BUG_ON(intel_context_is_child(ce)); > > > > if (intel_context_is_parent(ce)) > > - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > - order_base_2(ce->parallel.number_children > > - + 1)); > > + ret = new_mlrc_guc_id(guc, ce); > > else > > - ret = ida_simple_get(&guc->submission_state.guc_ids, > > - NUMBER_MULTI_LRC_GUC_ID(guc), > > - guc->submission_state.num_guc_ids, > > - GFP_KERNEL | __GFP_RETRY_MAYFAIL | > > - __GFP_NOWARN); > > + ret = new_slrc_guc_id(guc, ce); > > + > > with above helpers introduced, shouldn't we move code from new_guc_id() > to assign_guc_id() ? > Why add inline to code to assign_guc_id? > > if (unlikely(ret < 0)) > > return ret; > > > > @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > > > > GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > > > > + if (unlikely(intel_context_is_parent(ce) && > > + !guc->submission_state.guc_ids_bitmap)) { > > + guc->submission_state.guc_ids_bitmap = > > + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > > + if (!guc->submission_state.guc_ids_bitmap) > > + return -ENOMEM; > > + } > > maybe move this chunk to new_mlrc_guc_id() ? > or we can't due to the spin_lock below ? > but then how do you protect guc_ids_bitmap pointer itself ? > Can't use GFP_KERNEL inside a spin lock... Matt > -Michal > > > + > > try_again: > > spin_lock_irqsave(&guc->submission_state.lock, flags); > >
On 13.01.2022 00:26, Matthew Brost wrote: > On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote: >> On 11.01.2022 17:30, Matthew Brost wrote: ... >>> @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) >>> spin_unlock_irqrestore(&sched_engine->lock, flags); >>> } >>> >>> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) >>> +{ >>> + int ret; >>> + >>> + GEM_BUG_ON(!intel_context_is_parent(ce)); >>> + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); >>> + >>> + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, >>> + NUMBER_MULTI_LRC_GUC_ID(guc), >>> + order_base_2(ce->parallel.number_children >>> + + 1)); >> >> btw, is there any requirement (GuC ABI ?) that allocated ids need >> to be allocated with power of 2 alignment ? I don't think that we >> must optimize that hard and in some cases waste extra ids (as we might >> be limited on some configs) >> > > No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and > didn't optmize this. > there is a slower variant of "find" function: bitmap_find_next_zero_area - find a contiguous aligned zero area that does not have this limitation .. >>> @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) >>> >>> GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); >>> >>> + if (unlikely(intel_context_is_parent(ce) && >>> + !guc->submission_state.guc_ids_bitmap)) { >>> + guc->submission_state.guc_ids_bitmap = >>> + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); >>> + if (!guc->submission_state.guc_ids_bitmap) >>> + return -ENOMEM; >>> + } >> >> maybe move this chunk to new_mlrc_guc_id() ? >> or we can't due to the spin_lock below ? >> but then how do you protect guc_ids_bitmap pointer itself ? >> > > Can't use GFP_KERNEL inside a spin lock... > ok, but what if there will be two or more parallel calls to pin_guc_id() with all being first parent context? each will see NULL guc_ids_bitmap.. or there is another layer of synchronization? -Michal
On Thu, Jan 13, 2022 at 03:18:14PM +0100, Michal Wajdeczko wrote: > > > On 13.01.2022 00:26, Matthew Brost wrote: > > On Thu, Jan 13, 2022 at 12:21:17AM +0100, Michal Wajdeczko wrote: > >> On 11.01.2022 17:30, Matthew Brost wrote: > > ... > > >>> @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) > >>> spin_unlock_irqrestore(&sched_engine->lock, flags); > >>> } > >>> > >>> +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) > >>> +{ > >>> + int ret; > >>> + > >>> + GEM_BUG_ON(!intel_context_is_parent(ce)); > >>> + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); > >>> + > >>> + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, > >>> + NUMBER_MULTI_LRC_GUC_ID(guc), > >>> + order_base_2(ce->parallel.number_children > >>> + + 1)); > >> > >> btw, is there any requirement (GuC ABI ?) that allocated ids need > >> to be allocated with power of 2 alignment ? I don't think that we > >> must optimize that hard and in some cases waste extra ids (as we might > >> be limited on some configs) > >> > > > > No pow2 requirement in GuC ABI, bitmaps only work on pow2 alignment and > > didn't optmize this. > > > > there is a slower variant of "find" function: > > bitmap_find_next_zero_area - find a contiguous aligned zero area > > that does not have this limitation > Ah, wasn't aware of this. If this becomes an issue (running of multi-lrc ids) for customers I suppose this is the first thing we can do to try to address this. For now, I think we leave it as is. > .. > > > >>> @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) > >>> > >>> GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); > >>> > >>> + if (unlikely(intel_context_is_parent(ce) && > >>> + !guc->submission_state.guc_ids_bitmap)) { > >>> + guc->submission_state.guc_ids_bitmap = > >>> + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); > >>> + if (!guc->submission_state.guc_ids_bitmap) > >>> + return -ENOMEM; > >>> + } > >> > >> maybe move this chunk to new_mlrc_guc_id() ? > >> or we can't due to the spin_lock below ? > >> but then how do you protect guc_ids_bitmap pointer itself ? > >> > > > > Can't use GFP_KERNEL inside a spin lock... > > > > ok, but what if there will be two or more parallel calls to pin_guc_id() > with all being first parent context? each will see NULL guc_ids_bitmap.. > or there is another layer of synchronization? > Good catch. Yes, it techincally possible two multi-lrc contexts to try to allocate at the same time. I guess I should just do this at driver load time + allocate the maximum number of multi-lrc ids and possibly waste a bit of memory on a PF or VF. Matt > -Michal
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9989d121127df..1bacc9621cea8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -147,6 +147,8 @@ guc_create_parallel(struct intel_engine_cs **engines, */ #define NUMBER_MULTI_LRC_GUC_ID(guc) \ ((guc)->submission_state.num_guc_ids / 16) +#define NUMBER_SINGLE_LRC_GUC_ID(guc) \ + ((guc)->submission_state.num_guc_ids - NUMBER_MULTI_LRC_GUC_ID(guc)) /* * Below is a set of functions which control the GuC scheduling state which @@ -1776,11 +1778,6 @@ int intel_guc_submission_init(struct intel_guc *guc) INIT_WORK(&guc->submission_state.destroyed_worker, destroyed_worker_func); - guc->submission_state.guc_ids_bitmap = - bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); - if (!guc->submission_state.guc_ids_bitmap) - return -ENOMEM; - spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; @@ -1796,7 +1793,8 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_flush_destroyed_contexts(guc); guc_lrc_desc_pool_destroy(guc); i915_sched_engine_put(guc->sched_engine); - bitmap_free(guc->submission_state.guc_ids_bitmap); + if (guc->submission_state.guc_ids_bitmap) + bitmap_free(guc->submission_state.guc_ids_bitmap); } static inline void queue_request(struct i915_sched_engine *sched_engine, @@ -1863,6 +1861,33 @@ static void guc_submit_request(struct i915_request *rq) spin_unlock_irqrestore(&sched_engine->lock, flags); } +static int new_mlrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_parent(ce)); + GEM_BUG_ON(!guc->submission_state.guc_ids_bitmap); + + ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, + NUMBER_MULTI_LRC_GUC_ID(guc), + order_base_2(ce->parallel.number_children + + 1)); + if (likely(!(ret < 0))) + ret += NUMBER_SINGLE_LRC_GUC_ID(guc); + + return ret; +} + +static int new_slrc_guc_id(struct intel_guc *guc, struct intel_context *ce) +{ + GEM_BUG_ON(intel_context_is_parent(ce)); + + return ida_simple_get(&guc->submission_state.guc_ids, + 0, NUMBER_SINGLE_LRC_GUC_ID(guc), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + __GFP_NOWARN); +} + static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) { int ret; @@ -1870,16 +1895,10 @@ static int new_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(intel_context_is_child(ce)); if (intel_context_is_parent(ce)) - ret = bitmap_find_free_region(guc->submission_state.guc_ids_bitmap, - NUMBER_MULTI_LRC_GUC_ID(guc), - order_base_2(ce->parallel.number_children - + 1)); + ret = new_mlrc_guc_id(guc, ce); else - ret = ida_simple_get(&guc->submission_state.guc_ids, - NUMBER_MULTI_LRC_GUC_ID(guc), - guc->submission_state.num_guc_ids, - GFP_KERNEL | __GFP_RETRY_MAYFAIL | - __GFP_NOWARN); + ret = new_slrc_guc_id(guc, ce); + if (unlikely(ret < 0)) return ret; @@ -1989,6 +2008,14 @@ static int pin_guc_id(struct intel_guc *guc, struct intel_context *ce) GEM_BUG_ON(atomic_read(&ce->guc_id.ref)); + if (unlikely(intel_context_is_parent(ce) && + !guc->submission_state.guc_ids_bitmap)) { + guc->submission_state.guc_ids_bitmap = + bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL); + if (!guc->submission_state.guc_ids_bitmap) + return -ENOMEM; + } + try_again: spin_lock_irqsave(&guc->submission_state.lock, flags);
Move the multi-lrc guc_id from the lower allocation partition (0 to number of multi-lrc guc_ids) to upper allocation partition (number of single-lrc to max guc_ids). Signed-off-by: Matthew Brost <matthew.brost@intel.com> --- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 57 ++++++++++++++----- 1 file changed, 42 insertions(+), 15 deletions(-)