Message ID | 1493209238-32650-1-git-send-email-joonas.lahtinen@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 26/04/2017 13:20, Joonas Lahtinen wrote: > Pre-calculate engine context size based on engine class and device > generation and store it in the engine instance. > > v2: > - Squash and get rid of hw_context_size (Chris) > > Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Oscar Mateo <oscar.mateo@intel.com> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com> > Cc: intel-gvt-dev@lists.freedesktop.org > --- > drivers/gpu/drm/i915/gvt/scheduler.c | 6 +-- > drivers/gpu/drm/i915/i915_drv.h | 1 - > drivers/gpu/drm/i915/i915_gem_context.c | 59 +++------------------- > drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- > drivers/gpu/drm/i915/i915_reg.h | 10 ---- > drivers/gpu/drm/i915/intel_engine_cs.c | 78 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 54 +-------------------- > drivers/gpu/drm/i915/intel_lrc.h | 2 - > drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-- > 9 files changed, 93 insertions(+), 127 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index a77db23..ac538dc 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 357b6c6..4b54b92 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -2359,7 +2359,6 @@ struct drm_i915_private { > */ > struct mutex av_mutex; > > - uint32_t hw_context_size; > struct list_head context_list; > > u32 fdi_rx_config; > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c > index 8bd0c49..b69a026 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -92,33 +92,6 @@ > > #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 > > -static int get_context_size(struct drm_i915_private *dev_priv) > -{ > - int ret; > - u32 reg; > - > - switch (INTEL_GEN(dev_priv)) { > - case 6: > - reg = I915_READ(CXT_SIZE); > - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; > - break; > - case 7: > - reg = I915_READ(GEN7_CXT_SIZE); > - if (IS_HASWELL(dev_priv)) > - ret = HSW_CXT_TOTAL_SIZE; > - else > - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; > - break; > - case 8: > - ret = GEN8_CXT_TOTAL_SIZE; > - break; > - default: > - BUG(); > - } > - > - return ret; > -} > - > void i915_gem_context_free(struct kref *ctx_ref) > { > struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); > @@ -266,11 +239,12 @@ __create_hw_context(struct drm_i915_private *dev_priv, > list_add_tail(&ctx->link, &dev_priv->context_list); > ctx->i915 = dev_priv; > > - if (dev_priv->hw_context_size) { > + if (dev_priv->engine[RCS]->context_size) { > struct drm_i915_gem_object *obj; > struct i915_vma *vma; > > - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); > + obj = alloc_context_obj(dev_priv, > + dev_priv->engine[RCS]->context_size); > if (IS_ERR(obj)) { > ret = PTR_ERR(obj); > goto err_out; > @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) > BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); > ida_init(&dev_priv->context_hw_ida); > > - if (i915.enable_execlists) { > - /* NB: intentionally left blank. We will allocate our own > - * backing objects as we need them, thank you very much */ > - dev_priv->hw_context_size = 0; > - } else if (HAS_HW_CONTEXTS(dev_priv)) { > - dev_priv->hw_context_size = > - round_up(get_context_size(dev_priv), > - I915_GTT_PAGE_SIZE); Is this rounding up lost when used from __create_hw_context? > - if (dev_priv->hw_context_size > (1<<20)) { > - DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", > - dev_priv->hw_context_size); > - dev_priv->hw_context_size = 0; > - } > - } > - > ctx = i915_gem_create_context(dev_priv, NULL); > if (IS_ERR(ctx)) { > DRM_ERROR("Failed to create default global context (error %ld)\n", > @@ -478,7 +437,7 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) > > DRM_DEBUG_DRIVER("%s context support initialized\n", > i915.enable_execlists ? "LR" : > - dev_priv->hw_context_size ? "HW" : "fake"); > + dev_priv->engine[RCS]->context_size ? "HW" : "fake"); > return 0; > } > > @@ -941,11 +900,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) > return 0; > } > > -static bool contexts_enabled(struct drm_device *dev) > -{ > - return i915.enable_execlists || to_i915(dev)->hw_context_size; > -} > - > static bool client_is_banned(struct drm_i915_file_private *file_priv) > { > return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; > @@ -954,12 +908,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) > int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > struct drm_file *file) > { > + struct drm_i915_private *dev_priv = to_i915(dev); > struct drm_i915_gem_context_create *args = data; > struct drm_i915_file_private *file_priv = file->driver_priv; > struct i915_gem_context *ctx; > int ret; > > - if (!contexts_enabled(dev)) > + if (!dev_priv->engine[RCS]->context_size) > return -ENODEV; > > if (args->pad != 0) > @@ -977,7 +932,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, > if (ret) > return ret; > > - ctx = i915_gem_create_context(to_i915(dev), file_priv); > + ctx = i915_gem_create_context(dev_priv, file_priv); > mutex_unlock(&dev->struct_mutex); > if (IS_ERR(ctx)) > return PTR_ERR(ctx); > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index ab5140b..6c78637 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) > dev_priv->engine[RCS]->status_page.ggtt_offset; > > for_each_engine(engine, dev_priv, id) > - blob->ads.eng_state_size[engine->guc_id] = > - intel_lr_context_size(engine); > + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; > > base = guc_ggtt_offset(vma); > blob->ads.scheduler_policies = base + ptr_offset(blob, policies); > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h > index 4c72ada..ee8170c 100644 > --- a/drivers/gpu/drm/i915/i915_reg.h > +++ b/drivers/gpu/drm/i915/i915_reg.h > @@ -3370,16 +3370,6 @@ enum skl_disp_power_wells { > #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) > #define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ > GEN7_CXT_VFSTATE_SIZE(ctx_reg)) > -/* Haswell does have the CXT_SIZE register however it does not appear to be > - * valid. Now, docs explain in dwords what is in the context object. The full > - * size is 70720 bytes, however, the power context and execlist context will > - * never be saved (power context is stored elsewhere, and execlists don't work > - * on HSW) - so the final size, including the extra state required for the > - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. > - */ > -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) > -/* Same as Haswell, but 72064 bytes now. */ > -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) > > enum { > INTEL_ADVANCED_CONTEXT = 0, > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 82a274b..24bcd9a 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -26,6 +26,22 @@ > #include "intel_ringbuffer.h" > #include "intel_lrc.h" > > +/* Haswell does have the CXT_SIZE register however it does not appear to be > + * valid. Now, docs explain in dwords what is in the context object. The full > + * size is 70720 bytes, however, the power context and execlist context will > + * never be saved (power context is stored elsewhere, and execlists don't work > + * on HSW) - so the final size, including the extra state required for the > + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. > + */ > +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) > +/* Same as Haswell, but 72064 bytes now. */ > +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) > + > +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > + > +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) > + > struct engine_class_info { > const char *name; > int (*init_legacy)(struct intel_engine_cs *engine); > @@ -107,6 +123,64 @@ static const struct engine_info intel_engines[] = { > }, > }; > > +/** > + * ___intel_engine_context_size() - return the size of the context for an engine > + * @dev_priv: i915 device private > + * @class: engine class > + * > + * Each engine class may require a different amount of space for a context > + * image. > + * > + * Return: size (in bytes) of an engine class specific context image > + * > + * Note: this size includes the HWSP, which is part of the context image > + * in LRC mode, but does not include the "shared data page" used with > + * GuC submission. The caller should account for this if using the GuC. > + */ > +static u32 > +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) Very minor, but Chris has been trying to establish i915 instead of dev_priv in new code. > +{ > + u32 reg; Also very minor, I think we normally use val for register values and reg for the register itself. Quick grep suggests we are not 100% consistent already so feel free to ignore. :) > + > + switch (class) { > + case RENDER_CLASS: > + switch (INTEL_GEN(dev_priv)) { > + default: > + MISSING_CASE(INTEL_GEN(dev_priv)); > + case 9: > + return GEN9_LR_CONTEXT_RENDER_SIZE; > + case 8: > + return i915.enable_execlists ? > + GEN8_LR_CONTEXT_RENDER_SIZE : > + GEN8_CXT_TOTAL_SIZE; > + case 7: > + if (IS_HASWELL(dev_priv)) > + return HSW_CXT_TOTAL_SIZE; > + > + reg = I915_READ(GEN7_CXT_SIZE); > + return GEN7_CXT_TOTAL_SIZE(reg) * 64; > + case 6: > + reg = I915_READ(CXT_SIZE); > + return GEN6_CXT_TOTAL_SIZE(reg) * 64; > + case 5: > + case 4: > + case 3: > + case 2: > + /* For the special day when i810 gets merged. */ > + case 1: > + return 0; > + } > + break; > + default: > + MISSING_CASE(class); > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + case COPY_ENGINE_CLASS: > + WARN_ON(INTEL_GEN(dev_priv) < 8); > + return GEN8_LR_CONTEXT_OTHER_SIZE; > + } > +} > + > static int > intel_engine_setup(struct drm_i915_private *dev_priv, > enum intel_engine_id id) > @@ -134,6 +208,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, > engine->irq_shift = info->irq_shift; > engine->class = info->class; > engine->instance = info->instance; > + engine->context_size = __intel_engine_context_size(dev_priv, > + engine->class); > + if (WARN_ON(engine->context_size > BIT(20))) > + engine->context_size = 0; Don't know the history to tell whether upgrade of DRM_DEBUG_DRIVER to a WARN_ON is ok. > > /* Nothing to do here, execute in order of dependencies */ > engine->schedule = NULL; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 5ec064a..0909549 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -138,10 +138,6 @@ > #include "i915_drv.h" > #include "intel_mocs.h" > > -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) > - > #define RING_EXECLIST_QFULL (1 << 0x2) > #define RING_EXECLIST1_VALID (1 << 0x3) > #define RING_EXECLIST0_VALID (1 << 0x4) > @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, > return 0; > } > > -/** > - * intel_lr_context_size() - return the size of the context for an engine > - * @engine: which engine to find the context size for > - * > - * Each engine may require a different amount of space for a context image, > - * so when allocating (or copying) an image, this function can be used to > - * find the right size for the specific engine. > - * > - * Return: size (in bytes) of an engine-specific context image > - * > - * Note: this size includes the HWSP, which is part of the context image > - * in LRC mode, but does not include the "shared data page" used with > - * GuC submission. The caller should account for this if using the GuC. > - */ > -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - int ret; > - > - WARN_ON(INTEL_GEN(dev_priv) < 8); > - > - switch (engine->class) { > - case RENDER_CLASS: > - switch (INTEL_GEN(dev_priv)) { > - default: > - MISSING_CASE(INTEL_GEN(dev_priv)); > - case 9: > - ret = GEN9_LR_CONTEXT_RENDER_SIZE; > - break; > - case 8: > - ret = GEN8_LR_CONTEXT_RENDER_SIZE; > - break; > - } > - break; > - > - default: > - MISSING_CASE(engine->class); > - case VIDEO_DECODE_CLASS: > - case VIDEO_ENHANCEMENT_CLASS: > - case COPY_ENGINE_CLASS: > - ret = GEN8_LR_CONTEXT_OTHER_SIZE; > - break; > - } > - > - return ret; > -} > - > static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, > struct intel_engine_cs *engine) > { > @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, > > WARN_ON(ce->state); > > - context_size = round_up(intel_lr_context_size(engine), > - I915_GTT_PAGE_SIZE); > + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); > > /* One extra page as the sharing data between driver and GuC */ > context_size += PAGE_SIZE * LRC_PPHWSP_PN; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index e8015e7..52b3a1f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); > struct drm_i915_private; > struct i915_gem_context; > > -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); > - > void intel_lr_context_resume(struct drm_i915_private *dev_priv); > uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, > struct intel_engine_cs *engine); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 96710b6..a64d18c 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -196,13 +196,14 @@ struct intel_engine_cs { > enum intel_engine_id id; > unsigned int uabi_id; > unsigned int hw_id; > + unsigned int guc_id; > > u8 class; > u8 instance; > - > - unsigned int guc_id; > - u32 mmio_base; > + u32 context_size; > + u32 mmio_base; > unsigned int irq_shift; > + > struct intel_ring *buffer; > struct intel_timeline *timeline; > > Looks good to me in principle, just unsure about those two items form above. Acked-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Regards, Tvrtko
On 26/04/2017 14:16, Tvrtko Ursulin wrote: > On 26/04/2017 13:20, Joonas Lahtinen wrote: >> Pre-calculate engine context size based on engine class and device >> generation and store it in the engine instance. >> >> v2: >> - Squash and get rid of hw_context_size (Chris) >> >> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> >> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> >> Cc: Chris Wilson <chris@chris-wilson.co.uk> >> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> >> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Oscar Mateo <oscar.mateo@intel.com> >> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> >> Cc: intel-gvt-dev@lists.freedesktop.org >> --- >> drivers/gpu/drm/i915/gvt/scheduler.c | 6 +-- >> drivers/gpu/drm/i915/i915_drv.h | 1 - >> drivers/gpu/drm/i915/i915_gem_context.c | 59 +++------------------- >> drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- >> drivers/gpu/drm/i915/i915_reg.h | 10 ---- >> drivers/gpu/drm/i915/intel_engine_cs.c | 78 >> ++++++++++++++++++++++++++++++ >> drivers/gpu/drm/i915/intel_lrc.c | 54 +-------------------- >> drivers/gpu/drm/i915/intel_lrc.h | 2 - >> drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-- >> 9 files changed, 93 insertions(+), 127 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c >> b/drivers/gpu/drm/i915/gvt/scheduler.c >> index a77db23..ac538dc 100644 >> --- a/drivers/gpu/drm/i915/gvt/scheduler.c >> +++ b/drivers/gpu/drm/i915/gvt/scheduler.c >> @@ -69,8 +69,7 @@ static int populate_shadow_context(struct >> intel_vgpu_workload *workload) >> gvt_dbg_sched("ring id %d workload lrca %x", ring_id, >> workload->ctx_desc.lrca); >> >> - context_page_num = intel_lr_context_size( >> - gvt->dev_priv->engine[ring_id]); >> + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; >> >> context_page_num = context_page_num >> PAGE_SHIFT; >> >> @@ -333,8 +332,7 @@ static void update_guest_context(struct >> intel_vgpu_workload *workload) >> gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, >> workload->ctx_desc.lrca); >> >> - context_page_num = intel_lr_context_size( >> - gvt->dev_priv->engine[ring_id]); >> + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; >> >> context_page_num = context_page_num >> PAGE_SHIFT; >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h >> b/drivers/gpu/drm/i915/i915_drv.h >> index 357b6c6..4b54b92 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -2359,7 +2359,6 @@ struct drm_i915_private { >> */ >> struct mutex av_mutex; >> >> - uint32_t hw_context_size; >> struct list_head context_list; >> >> u32 fdi_rx_config; >> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >> b/drivers/gpu/drm/i915/i915_gem_context.c >> index 8bd0c49..b69a026 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_context.c >> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >> @@ -92,33 +92,6 @@ >> >> #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 >> >> -static int get_context_size(struct drm_i915_private *dev_priv) >> -{ >> - int ret; >> - u32 reg; >> - >> - switch (INTEL_GEN(dev_priv)) { >> - case 6: >> - reg = I915_READ(CXT_SIZE); >> - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; >> - break; >> - case 7: >> - reg = I915_READ(GEN7_CXT_SIZE); >> - if (IS_HASWELL(dev_priv)) >> - ret = HSW_CXT_TOTAL_SIZE; >> - else >> - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; >> - break; >> - case 8: >> - ret = GEN8_CXT_TOTAL_SIZE; >> - break; >> - default: >> - BUG(); >> - } >> - >> - return ret; >> -} >> - >> void i915_gem_context_free(struct kref *ctx_ref) >> { >> struct i915_gem_context *ctx = container_of(ctx_ref, >> typeof(*ctx), ref); >> @@ -266,11 +239,12 @@ __create_hw_context(struct drm_i915_private >> *dev_priv, >> list_add_tail(&ctx->link, &dev_priv->context_list); >> ctx->i915 = dev_priv; >> >> - if (dev_priv->hw_context_size) { >> + if (dev_priv->engine[RCS]->context_size) { >> struct drm_i915_gem_object *obj; >> struct i915_vma *vma; >> >> - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); >> + obj = alloc_context_obj(dev_priv, >> + dev_priv->engine[RCS]->context_size); >> if (IS_ERR(obj)) { >> ret = PTR_ERR(obj); >> goto err_out; >> @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private >> *dev_priv) >> BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); >> ida_init(&dev_priv->context_hw_ida); >> >> - if (i915.enable_execlists) { >> - /* NB: intentionally left blank. We will allocate our own >> - * backing objects as we need them, thank you very much */ >> - dev_priv->hw_context_size = 0; >> - } else if (HAS_HW_CONTEXTS(dev_priv)) { >> - dev_priv->hw_context_size = >> - round_up(get_context_size(dev_priv), >> - I915_GTT_PAGE_SIZE); > > Is this rounding up lost when used from __create_hw_context? Also lost seems keeping hw_context_size at zero when !HAS_HW_CONTEXT, deliberate? Looks like the replacement of contexts_enabled will not work as expected. Regards, Tvrtko
On ke, 2017-04-26 at 14:16 +0100, Tvrtko Ursulin wrote: > On 26/04/2017 13:20, Joonas Lahtinen wrote: > > > > @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) > > BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); > > ida_init(&dev_priv->context_hw_ida); > > > > - if (i915.enable_execlists) { > > - /* NB: intentionally left blank. We will allocate our own > > - * backing objects as we need them, thank you very much */ > > - dev_priv->hw_context_size = 0; > > - } else if (HAS_HW_CONTEXTS(dev_priv)) { > > - dev_priv->hw_context_size = > > - round_up(get_context_size(dev_priv), > > - I915_GTT_PAGE_SIZE); > > Is this rounding up lost when used from __create_hw_context? Added it back for Gen 7 and 6 where it could do something. Others have been rounded up in the heads of engineers already. > > +static u32 > > +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) > Very minor, but Chris has been trying to establish i915 instead of > dev_priv in new code. It'd shadow the global i915 in this case, so leaving it as is :/ > > @@ -134,6 +208,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, > > engine->irq_shift = info->irq_shift; > > engine->class = info->class; > > engine->instance = info->instance; > > + engine->context_size = __intel_engine_context_size(dev_priv, > > + engine->class); > > + if (WARN_ON(engine->context_size > BIT(20))) > > + engine->context_size = 0; > > Don't know the history to tell whether upgrade of DRM_DEBUG_DRIVER to a > WARN_ON is ok. Talked with Chris, if it triggers, it should definitely be WARN_ON :) Never seen in the past. Regards, Joonas
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a77db23..ac538dc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 357b6c6..4b54b92 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2359,7 +2359,6 @@ struct drm_i915_private { */ struct mutex av_mutex; - uint32_t hw_context_size; struct list_head context_list; u32 fdi_rx_config; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8bd0c49..b69a026 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,33 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -static int get_context_size(struct drm_i915_private *dev_priv) -{ - int ret; - u32 reg; - - switch (INTEL_GEN(dev_priv)) { - case 6: - reg = I915_READ(CXT_SIZE); - ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; - break; - case 7: - reg = I915_READ(GEN7_CXT_SIZE); - if (IS_HASWELL(dev_priv)) - ret = HSW_CXT_TOTAL_SIZE; - else - ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; - break; - case 8: - ret = GEN8_CXT_TOTAL_SIZE; - break; - default: - BUG(); - } - - return ret; -} - void i915_gem_context_free(struct kref *ctx_ref) { struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref); @@ -266,11 +239,12 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - if (dev_priv->hw_context_size) { + if (dev_priv->engine[RCS]->context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; - obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size); + obj = alloc_context_obj(dev_priv, + dev_priv->engine[RCS]->context_size); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_out; @@ -443,21 +417,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX); ida_init(&dev_priv->context_hw_ida); - if (i915.enable_execlists) { - /* NB: intentionally left blank. We will allocate our own - * backing objects as we need them, thank you very much */ - dev_priv->hw_context_size = 0; - } else if (HAS_HW_CONTEXTS(dev_priv)) { - dev_priv->hw_context_size = - round_up(get_context_size(dev_priv), - I915_GTT_PAGE_SIZE); - if (dev_priv->hw_context_size > (1<<20)) { - DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n", - dev_priv->hw_context_size); - dev_priv->hw_context_size = 0; - } - } - ctx = i915_gem_create_context(dev_priv, NULL); if (IS_ERR(ctx)) { DRM_ERROR("Failed to create default global context (error %ld)\n", @@ -478,7 +437,7 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("%s context support initialized\n", i915.enable_execlists ? "LR" : - dev_priv->hw_context_size ? "HW" : "fake"); + dev_priv->engine[RCS]->context_size ? "HW" : "fake"); return 0; } @@ -941,11 +900,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) return 0; } -static bool contexts_enabled(struct drm_device *dev) -{ - return i915.enable_execlists || to_i915(dev)->hw_context_size; -} - static bool client_is_banned(struct drm_i915_file_private *file_priv) { return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS; @@ -954,12 +908,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv) int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_context_create *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_gem_context *ctx; int ret; - if (!contexts_enabled(dev)) + if (!dev_priv->engine[RCS]->context_size) return -ENODEV; if (args->pad != 0) @@ -977,7 +932,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, if (ret) return ret; - ctx = i915_gem_create_context(to_i915(dev), file_priv); + ctx = i915_gem_create_context(dev_priv, file_priv); mutex_unlock(&dev->struct_mutex); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ab5140b..6c78637 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4c72ada..ee8170c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3370,16 +3370,6 @@ enum skl_disp_power_wells { #define GEN7_CXT_VFSTATE_SIZE(ctx_reg) (((ctx_reg) >> 0) & 0x3f) #define GEN7_CXT_TOTAL_SIZE(ctx_reg) (GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \ GEN7_CXT_VFSTATE_SIZE(ctx_reg)) -/* Haswell does have the CXT_SIZE register however it does not appear to be - * valid. Now, docs explain in dwords what is in the context object. The full - * size is 70720 bytes, however, the power context and execlist context will - * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size, including the extra state required for the - * Resource Streamer, is 66944 bytes, which rounds to 17 pages. - */ -#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) -/* Same as Haswell, but 72064 bytes now. */ -#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) enum { INTEL_ADVANCED_CONTEXT = 0, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 82a274b..24bcd9a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,6 +26,22 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" +/* Haswell does have the CXT_SIZE register however it does not appear to be + * valid. Now, docs explain in dwords what is in the context object. The full + * size is 70720 bytes, however, the power context and execlist context will + * never be saved (power context is stored elsewhere, and execlists don't work + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. + */ +#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +/* Same as Haswell, but 72064 bytes now. */ +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) + +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); @@ -107,6 +123,64 @@ static const struct engine_info intel_engines[] = { }, }; +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +{ + u32 reg; + + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return i915.enable_execlists ? + GEN8_LR_CONTEXT_RENDER_SIZE : + GEN8_CXT_TOTAL_SIZE; + case 7: + if (IS_HASWELL(dev_priv)) + return HSW_CXT_TOTAL_SIZE; + + reg = I915_READ(GEN7_CXT_SIZE); + return GEN7_CXT_TOTAL_SIZE(reg) * 64; + case 6: + reg = I915_READ(CXT_SIZE); + return GEN6_CXT_TOTAL_SIZE(reg) * 64; + case 5: + case 4: + case 3: + case 2: + /* For the special day when i810 gets merged. */ + case 1: + return 0; + } + break; + default: + MISSING_CASE(class); + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + WARN_ON(INTEL_GEN(dev_priv) < 8); + return GEN8_LR_CONTEXT_OTHER_SIZE; + } +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) @@ -134,6 +208,10 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); + if (WARN_ON(engine->context_size > BIT(20))) + engine->context_size = 0; /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5ec064a..0909549 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -138,10 +138,6 @@ #include "i915_drv.h" #include "intel_mocs.h" -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, return 0; } -/** - * intel_lr_context_size() - return the size of the context for an engine - * @engine: which engine to find the context size for - * - * Each engine may require a different amount of space for a context image, - * so when allocating (or copying) an image, this function can be used to - * find the right size for the specific engine. - * - * Return: size (in bytes) of an engine-specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - WARN_ON(INTEL_GEN(dev_priv) < 8); - - switch (engine->class) { - case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { - default: - MISSING_CASE(INTEL_GEN(dev_priv)); - case 9: - ret = GEN9_LR_CONTEXT_RENDER_SIZE; - break; - case 8: - ret = GEN8_LR_CONTEXT_RENDER_SIZE; - break; - } - break; - - default: - MISSING_CASE(engine->class); - case VIDEO_DECODE_CLASS: - case VIDEO_ENHANCEMENT_CLASS: - case COPY_ENGINE_CLASS: - ret = GEN8_LR_CONTEXT_OTHER_SIZE; - break; - } - - return ret; -} - static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), - I915_GTT_PAGE_SIZE); + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index e8015e7..52b3a1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_i915_private; struct i915_gem_context; -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); - void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96710b6..a64d18c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -196,13 +196,14 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int uabi_id; unsigned int hw_id; + unsigned int guc_id; u8 class; u8 instance; - - unsigned int guc_id; - u32 mmio_base; + u32 context_size; + u32 mmio_base; unsigned int irq_shift; + struct intel_ring *buffer; struct intel_timeline *timeline;
Pre-calculate engine context size based on engine class and device generation and store it in the engine instance. v2: - Squash and get rid of hw_context_size (Chris) Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Oscar Mateo <oscar.mateo@intel.com> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> Cc: intel-gvt-dev@lists.freedesktop.org --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 +-- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_context.c | 59 +++------------------- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- drivers/gpu/drm/i915/i915_reg.h | 10 ---- drivers/gpu/drm/i915/intel_engine_cs.c | 78 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 54 +-------------------- drivers/gpu/drm/i915/intel_lrc.h | 2 - drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +-- 9 files changed, 93 insertions(+), 127 deletions(-)