Message ID | 20180829191602.73476-1-michal.wajdeczko@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | New GuC ABI | expand |
On 29/08/2018 20:16, Michal Wajdeczko wrote: > The new context descriptor format contains two assignable fields: > the SW Context ID (technically 11 bits, but practically limited to 2032 > entries due to some being reserved for future use by the GuC) and the > SW Counter (6 bits). > > We don't want to limit ourselves too much in the maximum number of > concurrent contexts we want to allow, so ideally we want to employ > every possible bit available. Unfortunately, a further limitation in > the interface with the GuC means the combination of SW Context ID + > SW Counter has to be unique within the same engine class (as we use > the SW Context ID to index in the GuC stage descriptor pool, and the > Engine Class + SW Counter to index in the 2-dimensional lrc array). > This essentially means we need to somehow encode the engine instance. > > Since the BSpec allows 6 bits for engine instance, we use the whole > SW counter for this task. If the limitation of 2032 maximum simultaneous > contexts is too restrictive, we can always squeeze things a bit more > (3 extras bits for hw_id, 3 bits for instance) and things will still > work (Gen11 does not instance more than 8 engines of any class). > > Another alternative would be to generate the hw_id per HW context > instead of per GEM context, but that has other problems (e.g. maximum > number of user-created contexts would be variable, no relationship > between a GuC principal descriptor and the proxy descriptor it uses, ...) > > Bspec: 12254 > > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> > Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> > Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Michel Thierry <michel.thierry@intel.com> > --- > drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++---- > drivers/gpu/drm/i915/i915_gem_context.c | 5 ++++- > drivers/gpu/drm/i915/i915_gem_context.h | 2 ++ > drivers/gpu/drm/i915/i915_reg.h | 2 ++ > drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++--- > 5 files changed, 28 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index e5b9d3c..34f5495 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1866,14 +1866,21 @@ struct drm_i915_private { > struct llist_head free_list; > struct work_struct free_work; > > - /* The hw wants to have a stable context identifier for the > + /* > + * The HW wants to have a stable context identifier for the > * lifetime of the context (for OA, PASID, faults, etc). > * This is limited in execlists to 21 bits. > + * In enhanced execlist (GEN11+) this is limited to 11 bits > + * (the SW Context ID field) but GuC limits it a bit further > + * (11 bits - 16) due to some entries being reserved for future > + * use (so the firmware only supports a GuC stage descriptor > + * pool of 2032 entries). > */ > struct ida hw_ida; > -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ > -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ > -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ > +#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ > +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ > +#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ > +#define GEN11_MAX_CONTEXT_HW_ID_WITH_GUC (GEN11_MAX_CONTEXT_HW_ID - 16) > } contexts; > > u32 fdi_rx_config; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index f15a039..e3b500c 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -209,7 +209,10 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) > unsigned int max; > > if (INTEL_GEN(dev_priv) >= 11) { > - max = GEN11_MAX_CONTEXT_HW_ID; > + if (USES_GUC_SUBMISSION(dev_priv)) > + max = GEN11_MAX_CONTEXT_HW_ID_WITH_GUC; > + else > + max = GEN11_MAX_CONTEXT_HW_ID; > } else { > /* > * When using GuC in proxy submission, GuC consumes the > diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h > index 851dad6..4b87f5d 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.h > +++ b/drivers/gpu/drm/i915/i915_gem_context.h > @@ -154,6 +154,8 @@ struct i915_gem_context { > struct intel_ring *ring; > u32 *lrc_reg_state; > u64 lrc_desc; > + u32 sw_context_id; > + u32 sw_counter; > int pin_count; > > const struct intel_context_ops *ops; > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index f232178..ea65d7b 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -3900,6 +3900,8 @@ enum { > #define GEN8_CTX_ID_WIDTH 21 > #define GEN11_SW_CTX_ID_SHIFT 37 > #define GEN11_SW_CTX_ID_WIDTH 11 > +#define GEN11_SW_COUNTER_SHIFT 55 > +#define GEN11_SW_COUNTER_WIDTH 6 > #define GEN11_ENGINE_CLASS_SHIFT 61 > #define GEN11_ENGINE_CLASS_WIDTH 3 > #define GEN11_ENGINE_INSTANCE_SHIFT 48 > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index f4b9972..3001a14 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -240,14 +240,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, > * anything below. > */ > if (INTEL_GEN(ctx->i915) >= 11) { Hey Michal, There is a comment just above the if () about updating the i915_perf.c (oa_get_render_ctx_id) when descriptor is updated. Otherwise this will break some part of the observability feature. You can verify this with the IGT tests/perf --run-subtest=gen8-unprivileged-single-ctx-counters Thanks a lot, - Lionel > - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); > - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; > + GEM_BUG_ON(ce->sw_context_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); > + desc |= (u64)ce->sw_context_id << GEN11_SW_CTX_ID_SHIFT; > /* bits 37-47 */ > > desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; > /* bits 48-53 */ > > - /* TODO: decide what to do with SW counter (bits 55-60) */ > + desc |= (u64)ce->sw_counter << GEN11_SW_COUNTER_SHIFT; > + /* bits 55-60 */ > > /* > * Although GuC will never see this upper part as it fills > @@ -2771,6 +2772,11 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, > ce->ring = ring; > ce->state = vma; > > + if (INTEL_GEN(ctx->i915) >= 11) { > + ce->sw_context_id = ctx->hw_id; > + ce->sw_counter = engine->instance; > + } > + > return 0; > > error_ring_free:
On 2018-08-30 02:08, Lionel Landwerlin wrote: > On 29/08/2018 20:16, Michal Wajdeczko wrote: >> The new context descriptor format contains two assignable fields: >> the SW Context ID (technically 11 bits, but practically limited to 2032 >> entries due to some being reserved for future use by the GuC) and the >> SW Counter (6 bits). >> >> We don't want to limit ourselves too much in the maximum number of >> concurrent contexts we want to allow, so ideally we want to employ >> every possible bit available. Unfortunately, a further limitation in >> the interface with the GuC means the combination of SW Context ID + >> SW Counter has to be unique within the same engine class (as we use >> the SW Context ID to index in the GuC stage descriptor pool, and the >> Engine Class + SW Counter to index in the 2-dimensional lrc array). >> This essentially means we need to somehow encode the engine instance. >> >> Since the BSpec allows 6 bits for engine instance, we use the whole >> SW counter for this task. If the limitation of 2032 maximum simultaneous >> contexts is too restrictive, we can always squeeze things a bit more >> (3 extras bits for hw_id, 3 bits for instance) and things will still >> work (Gen11 does not instance more than 8 engines of any class). >> >> Another alternative would be to generate the hw_id per HW context >> instead of per GEM context, but that has other problems (e.g. maximum >> number of user-created contexts would be variable, no relationship >> between a GuC principal descriptor and the proxy descriptor it uses, >> ...) >> >> Bspec: 12254 >> >> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> >> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> >> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> Cc: Michel Thierry <michel.thierry@intel.com> Tested-by: Tomasz Lis <tomasz.lis@intel.com> >> --- >> drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++---- >> drivers/gpu/drm/i915/i915_gem_context.c | 5 ++++- >> drivers/gpu/drm/i915/i915_gem_context.h | 2 ++ >> drivers/gpu/drm/i915/i915_reg.h | 2 ++ >> drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++--- >> 5 files changed, 28 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index e5b9d3c..34f5495 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1866,14 +1866,21 @@ struct drm_i915_private { >> struct llist_head free_list; >> struct work_struct free_work; >> - /* The hw wants to have a stable context identifier for the >> + /* >> + * The HW wants to have a stable context identifier for the >> * lifetime of the context (for OA, PASID, faults, etc). >> * This is limited in execlists to 21 bits. >> + * In enhanced execlist (GEN11+) this is limited to 11 bits >> + * (the SW Context ID field) but GuC limits it a bit further >> + * (11 bits - 16) due to some entries being reserved for future >> + * use (so the firmware only supports a GuC stage descriptor >> + * pool of 2032 entries). >> */ >> struct ida hw_ida; >> -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ >> -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ >> -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ >> +#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ >> +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ >> +#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ >> +#define GEN11_MAX_CONTEXT_HW_ID_WITH_GUC (GEN11_MAX_CONTEXT_HW_ID - 16) >> } contexts; >> u32 fdi_rx_config; >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >> b/drivers/gpu/drm/i915/i915_gem_context.c >> index f15a039..e3b500c 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -209,7 +209,10 @@ static int assign_hw_id(struct drm_i915_private >> *dev_priv, unsigned *out) >> unsigned int max; >> if (INTEL_GEN(dev_priv) >= 11) { >> - max = GEN11_MAX_CONTEXT_HW_ID; >> + if (USES_GUC_SUBMISSION(dev_priv)) >> + max = GEN11_MAX_CONTEXT_HW_ID_WITH_GUC; >> + else >> + max = GEN11_MAX_CONTEXT_HW_ID; >> } else { >> /* >> * When using GuC in proxy submission, GuC consumes the >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h >> b/drivers/gpu/drm/i915/i915_gem_context.h >> index 851dad6..4b87f5d 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.h >> +++ b/drivers/gpu/drm/i915/i915_gem_context.h >> @@ -154,6 +154,8 @@ struct i915_gem_context { >> struct intel_ring *ring; >> u32 *lrc_reg_state; >> u64 lrc_desc; >> + u32 sw_context_id; >> + u32 sw_counter; >> int pin_count; >> const struct intel_context_ops *ops; >> diff --git a/drivers/gpu/drm/i915/i915_reg.h >> b/drivers/gpu/drm/i915/i915_reg.h >> index f232178..ea65d7b 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -3900,6 +3900,8 @@ enum { >> #define GEN8_CTX_ID_WIDTH 21 >> #define GEN11_SW_CTX_ID_SHIFT 37 >> #define GEN11_SW_CTX_ID_WIDTH 11 >> +#define GEN11_SW_COUNTER_SHIFT 55 >> +#define GEN11_SW_COUNTER_WIDTH 6 >> #define GEN11_ENGINE_CLASS_SHIFT 61 >> #define GEN11_ENGINE_CLASS_WIDTH 3 >> #define GEN11_ENGINE_INSTANCE_SHIFT 48 >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index f4b9972..3001a14 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -240,14 +240,15 @@ static inline bool need_preempt(const struct >> intel_engine_cs *engine, >> * anything below. >> */ >> if (INTEL_GEN(ctx->i915) >= 11) { > > > Hey Michal, > > There is a comment just above the if () about updating the i915_perf.c > (oa_get_render_ctx_id) when descriptor is updated. > Otherwise this will break some part of the observability feature. > You can verify this with the IGT tests/perf > --run-subtest=gen8-unprivileged-single-ctx-counters > > Thanks a lot, > > - > Lionel Tested on KBL; works fine for both enable_guc=2 and enable_guc=3. ./tests/perf --run-subtest=gen8-unprivileged-single-ctx-counters IGT-Version: 1.22-g11db680 (x86_64) (Linux: 4.19.0-rc1tli+ x86_64) Subtest gen8-unprivileged-single-ctx-counters: SUCCESS (0,058s) -Tomasz >> - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); >> - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; >> + GEM_BUG_ON(ce->sw_context_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); >> + desc |= (u64)ce->sw_context_id << GEN11_SW_CTX_ID_SHIFT; >> /* bits 37-47 */ >> desc |= (u64)engine->instance << >> GEN11_ENGINE_INSTANCE_SHIFT; >> /* bits 48-53 */ >> - /* TODO: decide what to do with SW counter (bits 55-60) */ >> + desc |= (u64)ce->sw_counter << GEN11_SW_COUNTER_SHIFT; >> + /* bits 55-60 */ >> /* >> * Although GuC will never see this upper part as it fills >> @@ -2771,6 +2772,11 @@ static int >> execlists_context_deferred_alloc(struct i915_gem_context *ctx, >> ce->ring = ring; >> ce->state = vma; >> + if (INTEL_GEN(ctx->i915) >= 11) { >> + ce->sw_context_id = ctx->hw_id; >> + ce->sw_counter = engine->instance; >> + } >> + >> return 0; >> error_ring_free: > > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 2018-08-30 16:15, Lis, Tomasz wrote: > > > On 2018-08-30 02:08, Lionel Landwerlin wrote: >> On 29/08/2018 20:16, Michal Wajdeczko wrote: >>> The new context descriptor format contains two assignable fields: >>> the SW Context ID (technically 11 bits, but practically limited to 2032 >>> entries due to some being reserved for future use by the GuC) and the >>> SW Counter (6 bits). >>> >>> We don't want to limit ourselves too much in the maximum number of >>> concurrent contexts we want to allow, so ideally we want to employ >>> every possible bit available. Unfortunately, a further limitation in >>> the interface with the GuC means the combination of SW Context ID + >>> SW Counter has to be unique within the same engine class (as we use >>> the SW Context ID to index in the GuC stage descriptor pool, and the >>> Engine Class + SW Counter to index in the 2-dimensional lrc array). >>> This essentially means we need to somehow encode the engine instance. >>> >>> Since the BSpec allows 6 bits for engine instance, we use the whole >>> SW counter for this task. If the limitation of 2032 maximum >>> simultaneous >>> contexts is too restrictive, we can always squeeze things a bit more >>> (3 extras bits for hw_id, 3 bits for instance) and things will still >>> work (Gen11 does not instance more than 8 engines of any class). >>> >>> Another alternative would be to generate the hw_id per HW context >>> instead of per GEM context, but that has other problems (e.g. maximum >>> number of user-created contexts would be variable, no relationship >>> between a GuC principal descriptor and the proxy descriptor it uses, >>> ...) >>> >>> Bspec: 12254 >>> >>> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> >>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> >>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> >>> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >>> Cc: Michel Thierry <michel.thierry@intel.com> > Tested-by: Tomasz Lis <tomasz.lis@intel.com> >>> --- >>> drivers/gpu/drm/i915/i915_drv.h | 15 +++++++++++---- >>> drivers/gpu/drm/i915/i915_gem_context.c | 5 ++++- >>> drivers/gpu/drm/i915/i915_gem_context.h | 2 ++ >>> drivers/gpu/drm/i915/i915_reg.h | 2 ++ >>> drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++--- >>> 5 files changed, 28 insertions(+), 8 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_drv.h >>> b/drivers/gpu/drm/i915/i915_drv.h >>> index e5b9d3c..34f5495 100644 >>> --- a/drivers/gpu/drm/i915/i915_drv.h >>> +++ b/drivers/gpu/drm/i915/i915_drv.h >>> @@ -1866,14 +1866,21 @@ struct drm_i915_private { >>> struct llist_head free_list; >>> struct work_struct free_work; >>> - /* The hw wants to have a stable context identifier for the >>> + /* >>> + * The HW wants to have a stable context identifier for the >>> * lifetime of the context (for OA, PASID, faults, etc). >>> * This is limited in execlists to 21 bits. >>> + * In enhanced execlist (GEN11+) this is limited to 11 bits >>> + * (the SW Context ID field) but GuC limits it a bit further >>> + * (11 bits - 16) due to some entries being reserved for >>> future >>> + * use (so the firmware only supports a GuC stage descriptor >>> + * pool of 2032 entries). >>> */ >>> struct ida hw_ida; >>> -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ >>> -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ >>> -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ >>> +#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ >>> +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ >>> +#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ >>> +#define GEN11_MAX_CONTEXT_HW_ID_WITH_GUC (GEN11_MAX_CONTEXT_HW_ID - >>> 16) >>> } contexts; >>> u32 fdi_rx_config; >>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >>> b/drivers/gpu/drm/i915/i915_gem_context.c >>> index f15a039..e3b500c 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_context.c >>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >>> @@ -209,7 +209,10 @@ static int assign_hw_id(struct drm_i915_private >>> *dev_priv, unsigned *out) >>> unsigned int max; >>> if (INTEL_GEN(dev_priv) >= 11) { >>> - max = GEN11_MAX_CONTEXT_HW_ID; >>> + if (USES_GUC_SUBMISSION(dev_priv)) >>> + max = GEN11_MAX_CONTEXT_HW_ID_WITH_GUC; >>> + else >>> + max = GEN11_MAX_CONTEXT_HW_ID; >>> } else { >>> /* >>> * When using GuC in proxy submission, GuC consumes the >>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.h >>> b/drivers/gpu/drm/i915/i915_gem_context.h >>> index 851dad6..4b87f5d 100644 >>> --- a/drivers/gpu/drm/i915/i915_gem_context.h >>> +++ b/drivers/gpu/drm/i915/i915_gem_context.h >>> @@ -154,6 +154,8 @@ struct i915_gem_context { >>> struct intel_ring *ring; >>> u32 *lrc_reg_state; >>> u64 lrc_desc; >>> + u32 sw_context_id; >>> + u32 sw_counter; >>> int pin_count; >>> const struct intel_context_ops *ops; >>> diff --git a/drivers/gpu/drm/i915/i915_reg.h >>> b/drivers/gpu/drm/i915/i915_reg.h >>> index f232178..ea65d7b 100644 >>> --- a/drivers/gpu/drm/i915/i915_reg.h >>> +++ b/drivers/gpu/drm/i915/i915_reg.h >>> @@ -3900,6 +3900,8 @@ enum { >>> #define GEN8_CTX_ID_WIDTH 21 >>> #define GEN11_SW_CTX_ID_SHIFT 37 >>> #define GEN11_SW_CTX_ID_WIDTH 11 >>> +#define GEN11_SW_COUNTER_SHIFT 55 >>> +#define GEN11_SW_COUNTER_WIDTH 6 >>> #define GEN11_ENGINE_CLASS_SHIFT 61 >>> #define GEN11_ENGINE_CLASS_WIDTH 3 >>> #define GEN11_ENGINE_INSTANCE_SHIFT 48 >>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >>> b/drivers/gpu/drm/i915/intel_lrc.c >>> index f4b9972..3001a14 100644 >>> --- a/drivers/gpu/drm/i915/intel_lrc.c >>> +++ b/drivers/gpu/drm/i915/intel_lrc.c >>> @@ -240,14 +240,15 @@ static inline bool need_preempt(const struct >>> intel_engine_cs *engine, >>> * anything below. >>> */ >>> if (INTEL_GEN(ctx->i915) >= 11) { >> >> >> Hey Michal, >> >> There is a comment just above the if () about updating the >> i915_perf.c (oa_get_render_ctx_id) when descriptor is updated. >> Otherwise this will break some part of the observability feature. >> You can verify this with the IGT tests/perf >> --run-subtest=gen8-unprivileged-single-ctx-counters >> >> Thanks a lot, >> >> - >> Lionel > Tested on KBL; works fine for both enable_guc=2 and enable_guc=3. > > ./tests/perf --run-subtest=gen8-unprivileged-single-ctx-counters > IGT-Version: 1.22-g11db680 (x86_64) (Linux: 4.19.0-rc1tli+ x86_64) > Subtest gen8-unprivileged-single-ctx-counters: SUCCESS (0,058s) > > -Tomasz Took a bit longer, but tested on ICL as well. Test passes for enable_guc=1 and 3. There is still an issue with enable_guc=2, but that's not related to observability feature (and out of scope of this review). -Tomasz >>> - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); >>> - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; >>> + GEM_BUG_ON(ce->sw_context_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); >>> + desc |= (u64)ce->sw_context_id << GEN11_SW_CTX_ID_SHIFT; >>> /* bits 37-47 */ >>> desc |= (u64)engine->instance << >>> GEN11_ENGINE_INSTANCE_SHIFT; >>> /* bits 48-53 */ >>> - /* TODO: decide what to do with SW counter (bits 55-60) */ >>> + desc |= (u64)ce->sw_counter << GEN11_SW_COUNTER_SHIFT; >>> + /* bits 55-60 */ >>> /* >>> * Although GuC will never see this upper part as it fills >>> @@ -2771,6 +2772,11 @@ static int >>> execlists_context_deferred_alloc(struct i915_gem_context *ctx, >>> ce->ring = ring; >>> ce->state = vma; >>> + if (INTEL_GEN(ctx->i915) >= 11) { >>> + ce->sw_context_id = ctx->hw_id; >>> + ce->sw_counter = engine->instance; >>> + } >>> + >>> return 0; >>> error_ring_free: >> >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e5b9d3c..34f5495 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1866,14 +1866,21 @@ struct drm_i915_private { struct llist_head free_list; struct work_struct free_work; - /* The hw wants to have a stable context identifier for the + /* + * The HW wants to have a stable context identifier for the * lifetime of the context (for OA, PASID, faults, etc). * This is limited in execlists to 21 bits. + * In enhanced execlist (GEN11+) this is limited to 11 bits + * (the SW Context ID field) but GuC limits it a bit further + * (11 bits - 16) due to some entries being reserved for future + * use (so the firmware only supports a GuC stage descriptor + * pool of 2032 entries). */ struct ida hw_ida; -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ +#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID_WITH_GUC (GEN11_MAX_CONTEXT_HW_ID - 16) } contexts; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f15a039..e3b500c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -209,7 +209,10 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) unsigned int max; if (INTEL_GEN(dev_priv) >= 11) { - max = GEN11_MAX_CONTEXT_HW_ID; + if (USES_GUC_SUBMISSION(dev_priv)) + max = GEN11_MAX_CONTEXT_HW_ID_WITH_GUC; + else + max = GEN11_MAX_CONTEXT_HW_ID; } else { /* * When using GuC in proxy submission, GuC consumes the diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 851dad6..4b87f5d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -154,6 +154,8 @@ struct i915_gem_context { struct intel_ring *ring; u32 *lrc_reg_state; u64 lrc_desc; + u32 sw_context_id; + u32 sw_counter; int pin_count; const struct intel_context_ops *ops; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f232178..ea65d7b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3900,6 +3900,8 @@ enum { #define GEN8_CTX_ID_WIDTH 21 #define GEN11_SW_CTX_ID_SHIFT 37 #define GEN11_SW_CTX_ID_WIDTH 11 +#define GEN11_SW_COUNTER_SHIFT 55 +#define GEN11_SW_COUNTER_WIDTH 6 #define GEN11_ENGINE_CLASS_SHIFT 61 #define GEN11_ENGINE_CLASS_WIDTH 3 #define GEN11_ENGINE_INSTANCE_SHIFT 48 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f4b9972..3001a14 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -240,14 +240,15 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, * anything below. */ if (INTEL_GEN(ctx->i915) >= 11) { - GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); - desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT; + GEM_BUG_ON(ce->sw_context_id >= BIT(GEN11_SW_CTX_ID_WIDTH)); + desc |= (u64)ce->sw_context_id << GEN11_SW_CTX_ID_SHIFT; /* bits 37-47 */ desc |= (u64)engine->instance << GEN11_ENGINE_INSTANCE_SHIFT; /* bits 48-53 */ - /* TODO: decide what to do with SW counter (bits 55-60) */ + desc |= (u64)ce->sw_counter << GEN11_SW_COUNTER_SHIFT; + /* bits 55-60 */ /* * Although GuC will never see this upper part as it fills @@ -2771,6 +2772,11 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ce->ring = ring; ce->state = vma; + if (INTEL_GEN(ctx->i915) >= 11) { + ce->sw_context_id = ctx->hw_id; + ce->sw_counter = engine->instance; + } + return 0; error_ring_free: