Message ID | 1493197914-12383-1-git-send-email-joonas.lahtinen@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Joonas: Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? Then we don't need the hack in GVT-g. :P Actually it's 19 pages not 20 pages on BDW. Thanks, Zhi. 于 04/26/17 17:11, Joonas Lahtinen 写道: > Pre-calculate engine context size based on engine class and device > generation and store it in the engine instance. > > Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Oscar Mateo <oscar.mateo@intel.com> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com> > Cc: intel-gvt-dev@lists.freedesktop.org > --- > drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++-- > drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- > drivers/gpu/drm/i915/intel_engine_cs.c | 46 +++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 54 +----------------------------- > drivers/gpu/drm/i915/intel_lrc.h | 2 -- > drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +-- > 6 files changed, 53 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index a77db23..ac538dc 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index ab5140b..6c78637 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) > dev_priv->engine[RCS]->status_page.ggtt_offset; > > for_each_engine(engine, dev_priv, id) > - blob->ads.eng_state_size[engine->guc_id] = > - intel_lr_context_size(engine); > + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; > > base = guc_ggtt_offset(vma); > blob->ads.scheduler_policies = base + ptr_offset(blob, policies); > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 82a274b..091c0c7 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -26,6 +26,10 @@ > #include "intel_ringbuffer.h" > #include "intel_lrc.h" > > +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) > + > struct engine_class_info { > const char *name; > int (*init_legacy)(struct intel_engine_cs *engine); > @@ -107,6 +111,46 @@ static const struct engine_info intel_engines[] = { > }, > }; > > +/** > + * ___intel_engine_context_size() - return the size of the context for an engine > + * @dev_priv: i915 device private > + * @class: engine class > + * > + * Each engine class may require a different amount of space for a context > + * image. > + * > + * Return: size (in bytes) of an engine class specific context image > + * > + * Note: this size includes the HWSP, which is part of the context image > + * in LRC mode, but does not include the "shared data page" used with > + * GuC submission. The caller should account for this if using the GuC. > + */ > +static u32 > +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) > +{ > + WARN_ON(INTEL_GEN(dev_priv) < 8); > + > + switch (class) { > + case RENDER_CLASS: > + switch (INTEL_GEN(dev_priv)) { > + default: > + MISSING_CASE(INTEL_GEN(dev_priv)); > + case 9: > + return GEN9_LR_CONTEXT_RENDER_SIZE; > + case 8: > + return GEN8_LR_CONTEXT_RENDER_SIZE; > + } > + break; > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + case COPY_ENGINE_CLASS: > + return GEN8_LR_CONTEXT_OTHER_SIZE; > + } > + > + MISSING_CASE(class); > + return GEN8_LR_CONTEXT_OTHER_SIZE; > +} > + > static int > intel_engine_setup(struct drm_i915_private *dev_priv, > enum intel_engine_id id) > @@ -134,6 +178,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, > engine->irq_shift = info->irq_shift; > engine->class = info->class; > engine->instance = info->instance; > + engine->context_size = __intel_engine_context_size(dev_priv, > + engine->class); > > /* Nothing to do here, execute in order of dependencies */ > engine->schedule = NULL; > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 5ec064a..0909549 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -138,10 +138,6 @@ > #include "i915_drv.h" > #include "intel_mocs.h" > > -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) > - > #define RING_EXECLIST_QFULL (1 << 0x2) > #define RING_EXECLIST1_VALID (1 << 0x3) > #define RING_EXECLIST0_VALID (1 << 0x4) > @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, > return 0; > } > > -/** > - * intel_lr_context_size() - return the size of the context for an engine > - * @engine: which engine to find the context size for > - * > - * Each engine may require a different amount of space for a context image, > - * so when allocating (or copying) an image, this function can be used to > - * find the right size for the specific engine. > - * > - * Return: size (in bytes) of an engine-specific context image > - * > - * Note: this size includes the HWSP, which is part of the context image > - * in LRC mode, but does not include the "shared data page" used with > - * GuC submission. The caller should account for this if using the GuC. > - */ > -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) > -{ > - struct drm_i915_private *dev_priv = engine->i915; > - int ret; > - > - WARN_ON(INTEL_GEN(dev_priv) < 8); > - > - switch (engine->class) { > - case RENDER_CLASS: > - switch (INTEL_GEN(dev_priv)) { > - default: > - MISSING_CASE(INTEL_GEN(dev_priv)); > - case 9: > - ret = GEN9_LR_CONTEXT_RENDER_SIZE; > - break; > - case 8: > - ret = GEN8_LR_CONTEXT_RENDER_SIZE; > - break; > - } > - break; > - > - default: > - MISSING_CASE(engine->class); > - case VIDEO_DECODE_CLASS: > - case VIDEO_ENHANCEMENT_CLASS: > - case COPY_ENGINE_CLASS: > - ret = GEN8_LR_CONTEXT_OTHER_SIZE; > - break; > - } > - > - return ret; > -} > - > static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, > struct intel_engine_cs *engine) > { > @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, > > WARN_ON(ce->state); > > - context_size = round_up(intel_lr_context_size(engine), > - I915_GTT_PAGE_SIZE); > + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); > > /* One extra page as the sharing data between driver and GuC */ > context_size += PAGE_SIZE * LRC_PPHWSP_PN; > diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h > index e8015e7..52b3a1f 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.h > +++ b/drivers/gpu/drm/i915/intel_lrc.h > @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); > struct drm_i915_private; > struct i915_gem_context; > > -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); > - > void intel_lr_context_resume(struct drm_i915_private *dev_priv); > uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, > struct intel_engine_cs *engine); > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 96710b6..598194d 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -196,12 +196,13 @@ struct intel_engine_cs { > enum intel_engine_id id; > unsigned int uabi_id; > unsigned int hw_id; > + unsigned int guc_id; > > u8 class; > u8 instance; > + u32 context_size; > > - unsigned int guc_id; > - u32 mmio_base; > + u32 mmio_base; > unsigned int irq_shift; > struct intel_ring *buffer; > struct intel_timeline *timeline;
On Wed, Apr 26, 2017 at 12:11:53PM +0300, Joonas Lahtinen wrote: > Pre-calculate engine context size based on engine class and device > generation and store it in the engine instance. > > Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> > Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Oscar Mateo <oscar.mateo@intel.com> > Cc: Zhenyu Wang <zhenyuw@linux.intel.com> > Cc: intel-gvt-dev@lists.freedesktop.org > --- > drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++-- > drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- > drivers/gpu/drm/i915/intel_engine_cs.c | 46 +++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 54 +----------------------------- > drivers/gpu/drm/i915/intel_lrc.h | 2 -- > drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +-- > 6 files changed, 53 insertions(+), 63 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index a77db23..ac538dc 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) > gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, > workload->ctx_desc.lrca); > > - context_page_num = intel_lr_context_size( > - gvt->dev_priv->engine[ring_id]); > + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; > > context_page_num = context_page_num >> PAGE_SHIFT; > > diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c > index ab5140b..6c78637 100644 > --- a/drivers/gpu/drm/i915/i915_guc_submission.c > +++ b/drivers/gpu/drm/i915/i915_guc_submission.c > @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) > dev_priv->engine[RCS]->status_page.ggtt_offset; > > for_each_engine(engine, dev_priv, id) > - blob->ads.eng_state_size[engine->guc_id] = > - intel_lr_context_size(engine); > + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; > > base = guc_ggtt_offset(vma); > blob->ads.scheduler_policies = base + ptr_offset(blob, policies); > diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c > index 82a274b..091c0c7 100644 > --- a/drivers/gpu/drm/i915/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/intel_engine_cs.c > @@ -26,6 +26,10 @@ > #include "intel_ringbuffer.h" > #include "intel_lrc.h" > > +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) > +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) > +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) > + > struct engine_class_info { > const char *name; > int (*init_legacy)(struct intel_engine_cs *engine); > @@ -107,6 +111,46 @@ static const struct engine_info intel_engines[] = { > }, > }; > > +/** > + * ___intel_engine_context_size() - return the size of the context for an engine > + * @dev_priv: i915 device private > + * @class: engine class > + * > + * Each engine class may require a different amount of space for a context > + * image. > + * > + * Return: size (in bytes) of an engine class specific context image > + * > + * Note: this size includes the HWSP, which is part of the context image > + * in LRC mode, but does not include the "shared data page" used with > + * GuC submission. The caller should account for this if using the GuC. > + */ > +static u32 > +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) > +{ > + WARN_ON(INTEL_GEN(dev_priv) < 8); > + > + switch (class) { > + case RENDER_CLASS: > + switch (INTEL_GEN(dev_priv)) { > + default: > + MISSING_CASE(INTEL_GEN(dev_priv)); > + case 9: > + return GEN9_LR_CONTEXT_RENDER_SIZE; > + case 8: > + return GEN8_LR_CONTEXT_RENDER_SIZE; > + } > + break; > + case VIDEO_DECODE_CLASS: > + case VIDEO_ENHANCEMENT_CLASS: > + case COPY_ENGINE_CLASS: > + return GEN8_LR_CONTEXT_OTHER_SIZE; > + } > + > + MISSING_CASE(class); > + return GEN8_LR_CONTEXT_OTHER_SIZE; > +} > + > static int > intel_engine_setup(struct drm_i915_private *dev_priv, > enum intel_engine_id id) > @@ -134,6 +178,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, > engine->irq_shift = info->irq_shift; > engine->class = info->class; > engine->instance = info->instance; > + engine->context_size = __intel_engine_context_size(dev_priv, > + engine->class); Isn't intel_engine_setup() common for all gen? Hmm, I would like to dev_priv->hw_context_size just die, and be able to use dev_priv->engine[RCS]->context_size instead. That makes contexts_enabled() much simpler, for example, and kills yet another i915.enable_execlists. But does make __create_hw_context() a bit more ugly until we do deferred allocation for legacy as well (the framework is in place!) -Chris
Hi Harsh: Thanks for your help last time. You show us the size of context image is actual 19 pages on BDW. Is it the same also on CHV? Thanks, Zhi. 于 04/26/17 17:52, Joonas Lahtinen 写道: > On ke, 2017-04-26 at 17:10 +0800, Zhi Wang wrote: >> Hi Joonas: >> Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? >> Then we don't need the hack in GVT-g. :P Actually it's 19 pages not >> 20 pages on BDW. > The exception is only made for BDW, not Gen8 overall. Has the change > been verified for CHV too? > > Why hasn't a patch to fix above been sent for i915 in the past? Just > like in the stolen memory disabling case, bugs should be root caused > and then fixed, not just worked around quickly. > > Regards, Joonas
On ke, 2017-04-26 at 17:10 +0800, Zhi Wang wrote: > Hi Joonas: > Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? > Then we don't need the hack in GVT-g. :P Actually it's 19 pages not > 20 pages on BDW. The exception is only made for BDW, not Gen8 overall. Has the change been verified for CHV too? Why hasn't a patch to fix above been sent for i915 in the past? Just like in the stolen memory disabling case, bugs should be root caused and then fixed, not just worked around quickly. Regards, Joonas
Uh...sorry for not mentioning that before:), and stolen memory is not my business. :( Actually we root-caused it. This is how we found this case: The story is W driver directly allocated the ring buffer after the context image, and the context image size in W driver is 19 pages. GVT-g will do shadow context during submission, we copy 20 pages from guest context image, so you can see, an extra page is copied here as the context image size is actual 19 pages. The extra page belows to ring buffer. When guest updates that page with new commands during GVT-g executing the workload, the extra page ( which is ring buffer page) will be over-written with old content, since GVT-g will copy the shadow context (20 pages) back to guest at this time. That's the full story. I send another email to Harsh. He should know if the context image size of CHV is also 19 pages. Thanks, Zhi. 于 04/26/17 17:52, Joonas Lahtinen 写道: > On ke, 2017-04-26 at 17:10 +0800, Zhi Wang wrote: >> Hi Joonas: >> Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? >> Then we don't need the hack in GVT-g. :P Actually it's 19 pages not >> 20 pages on BDW. > The exception is only made for BDW, not Gen8 overall. Has the change > been verified for CHV too? > > Why hasn't a patch to fix above been sent for i915 in the past? Just > like in the stolen memory disabling case, bugs should be root caused > and then fixed, not just worked around quickly. > > Regards, Joonas
> -----Original Message----- > Subject: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] drm/i915: > Pre-calculate engine context size > > == Series Details == > > Series: series starting with [1/2] drm/i915: Pre-calculate engine context size > URL : https://patchwork.freedesktop.org/series/23559/ > State : warning > > == Summary == > > Series 23559v1 Series without cover letter > https://patchwork.freedesktop.org/api/1.0/series/23559/revisions/1/mbox/ > > Test drv_module_reload: > Subgroup basic-no-display: > pass -> DMESG-WARN (fi-hsw-4770r) > pass -> DMESG-WARN (fi-byt-j1900) > pass -> DMESG-WARN (fi-byt-n2820) > pass -> DMESG-WARN (fi-ivb-3520m) > pass -> DMESG-WARN (fi-ilk-650) > pass -> DMESG-WARN (fi-snb-2520m) > pass -> DMESG-WARN (fi-hsw-4770) > pass -> DMESG-WARN (fi-ivb-3770) > pass -> DMESG-WARN (fi-snb-2600) > Subgroup basic-reload: > pass -> DMESG-WARN (fi-hsw-4770r) > pass -> DMESG-WARN (fi-byt-j1900) > pass -> DMESG-WARN (fi-byt-n2820) > pass -> DMESG-WARN (fi-ivb-3520m) > pass -> DMESG-WARN (fi-ilk-650) > pass -> DMESG-WARN (fi-snb-2520m) > pass -> DMESG-WARN (fi-hsw-4770) > pass -> DMESG-WARN (fi-ivb-3770) > pass -> DMESG-WARN (fi-snb-2600) > Subgroup basic-reload-final: > pass -> DMESG-WARN (fi-hsw-4770r) > pass -> DMESG-WARN (fi-byt-j1900) > pass -> DMESG-WARN (fi-byt-n2820) > pass -> DMESG-WARN (fi-ivb-3520m) > pass -> DMESG-WARN (fi-ilk-650) > pass -> DMESG-WARN (fi-snb-2520m) > pass -> DMESG-WARN (fi-hsw-4770) > pass -> DMESG-WARN (fi-ivb-3770) > pass -> DMESG-WARN (fi-snb-2600) > Subgroup basic-reload-inject: > pass -> DMESG-WARN (fi-hsw-4770r) > pass -> DMESG-WARN (fi-byt-j1900) > pass -> DMESG-WARN (fi-byt-n2820) > pass -> DMESG-WARN (fi-ivb-3520m) > pass -> DMESG-WARN (fi-ilk-650) > pass -> DMESG-WARN (fi-snb-2520m) > pass -> DMESG-WARN (fi-hsw-4770) > pass -> DMESG-WARN (fi-ivb-3770) > pass -> DMESG-WARN (fi-snb-2600) > Test gem_exec_suspend: > Subgroup basic-s4-devices: > pass -> DMESG-WARN (fi-kbl-7560u) fdo#100125 How about trybot first? > > fdo#100125 https://bugs.freedesktop.org/show_bug.cgi?id=100125 > > fi-bdw-5557u total:278 pass:267 dwarn:0 dfail:0 fail:0 skip:11 time:430s > fi-bdw-gvtdvm total:278 pass:256 dwarn:8 dfail:0 fail:0 skip:14 time:426s > fi-bsw-n3050 total:278 pass:242 dwarn:0 dfail:0 fail:0 skip:36 time:569s > fi-bxt-j4205 total:278 pass:259 dwarn:0 dfail:0 fail:0 skip:19 time:474s > fi-byt-j1900 total:278 pass:250 dwarn:4 dfail:0 fail:0 skip:24 time:485s > fi-byt-n2820 total:278 pass:246 dwarn:4 dfail:0 fail:0 skip:28 time:477s > fi-hsw-4770 total:278 pass:258 dwarn:4 dfail:0 fail:0 skip:16 time:410s > fi-hsw-4770r total:278 pass:258 dwarn:4 dfail:0 fail:0 skip:16 time:398s > fi-ilk-650 total:278 pass:224 dwarn:4 dfail:0 fail:0 skip:50 time:413s > fi-ivb-3520m total:278 pass:256 dwarn:4 dfail:0 fail:0 skip:18 time:481s > fi-ivb-3770 total:278 pass:256 dwarn:4 dfail:0 fail:0 skip:18 time:448s > fi-kbl-7500u total:278 pass:260 dwarn:0 dfail:0 fail:0 skip:18 time:438s > fi-kbl-7560u total:278 pass:267 dwarn:1 dfail:0 fail:0 skip:10 time:568s > fi-skl-6260u total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:455s > fi-skl-6700hq total:278 pass:261 dwarn:0 dfail:0 fail:0 skip:17 time:578s > fi-skl-6700k total:278 pass:256 dwarn:4 dfail:0 fail:0 skip:18 time:463s > fi-skl-6770hq total:278 pass:268 dwarn:0 dfail:0 fail:0 skip:10 time:489s > fi-snb-2520m total:278 pass:246 dwarn:4 dfail:0 fail:0 skip:28 time:532s > fi-snb-2600 total:278 pass:245 dwarn:4 dfail:0 fail:0 skip:29 time:406s > > 7ffb3045557cbc7b49695b20416351e4e812179c drm-tip: 2017y-04m-25d- > 14h-42m-59s UTC integration manifest 9e12aca drm/i915: Sanitize hardware > context computation > fc81a53 drm/i915: Pre-calculate engine context size > > == Logs == > > For more details see: https://intel-gfx-ci.01.org/CI/Patchwork_4553/ Jani Saarinen Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo
On ke, 2017-04-26 at 10:43 +0000, Saarinen, Jani wrote: > > > > -----Original Message----- > > Subject: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] drm/i915: > > Pre-calculate engine context size > > > > == Series Details == > > > > Series: series starting with [1/2] drm/i915: Pre-calculate engine context size > > URL : https://patchwork.freedesktop.org/series/23559/ > > State : warning > > > > == Summary == > > > > Series 23559v1 Series without cover letter > > https://patchwork.freedesktop.org/api/1.0/series/23559/revisions/1/mbox/ > > > > Test drv_module_reload: > > Subgroup basic-no-display: > > pass -> DMESG-WARN (fi-hsw-4770r) > > pass -> DMESG-WARN (fi-byt-j1900) > > pass -> DMESG-WARN (fi-byt-n2820) > > pass -> DMESG-WARN (fi-ivb-3520m) > > pass -> DMESG-WARN (fi-ilk-650) > > pass -> DMESG-WARN (fi-snb-2520m) > > pass -> DMESG-WARN (fi-hsw-4770) > > pass -> DMESG-WARN (fi-ivb-3770) > > pass -> DMESG-WARN (fi-snb-2600) > > Subgroup basic-reload: > > pass -> DMESG-WARN (fi-hsw-4770r) > > pass -> DMESG-WARN (fi-byt-j1900) > > pass -> DMESG-WARN (fi-byt-n2820) > > pass -> DMESG-WARN (fi-ivb-3520m) > > pass -> DMESG-WARN (fi-ilk-650) > > pass -> DMESG-WARN (fi-snb-2520m) > > pass -> DMESG-WARN (fi-hsw-4770) > > pass -> DMESG-WARN (fi-ivb-3770) > > pass -> DMESG-WARN (fi-snb-2600) > > Subgroup basic-reload-final: > > pass -> DMESG-WARN (fi-hsw-4770r) > > pass -> DMESG-WARN (fi-byt-j1900) > > pass -> DMESG-WARN (fi-byt-n2820) > > pass -> DMESG-WARN (fi-ivb-3520m) > > pass -> DMESG-WARN (fi-ilk-650) > > pass -> DMESG-WARN (fi-snb-2520m) > > pass -> DMESG-WARN (fi-hsw-4770) > > pass -> DMESG-WARN (fi-ivb-3770) > > pass -> DMESG-WARN (fi-snb-2600) > > Subgroup basic-reload-inject: > > pass -> DMESG-WARN (fi-hsw-4770r) > > pass -> DMESG-WARN (fi-byt-j1900) > > pass -> DMESG-WARN (fi-byt-n2820) > > pass -> DMESG-WARN (fi-ivb-3520m) > > pass -> DMESG-WARN (fi-ilk-650) > > pass -> DMESG-WARN (fi-snb-2520m) > > pass -> DMESG-WARN (fi-hsw-4770) > > pass -> DMESG-WARN (fi-ivb-3770) > > pass -> DMESG-WARN (fi-snb-2600) > > Test gem_exec_suspend: > > Subgroup basic-s4-devices: > > pass -> DMESG-WARN (fi-kbl-7560u) fdo#100125 > > How about trybot first? What exactly is the point of running CI on intel-gfx traffic if everything is supposed to be sent to trybot first? Sending trivial series to trybot first would double the already untolerable latency of the system. intel-gfx is supposed to be a place to discuss the patches among the developers, or you think we should have intel-gfx-dev for that and everything but ready-to-merge series are sent there? Regards, Joonas -- Joonas Lahtinen Open Source Technology Center Intel Corporation
Hi, > -----Original Message----- > From: Joonas Lahtinen [mailto:joonas.lahtinen@linux.intel.com] > Sent: Wednesday, April 26, 2017 3:36 PM > To: Saarinen, Jani <jani.saarinen@intel.com>; intel-gfx@lists.freedesktop.org > Subject: Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] > drm/i915: Pre-calculate engine context size > > On ke, 2017-04-26 at 10:43 +0000, Saarinen, Jani wrote: > > > > > > -----Original Message----- > > > Subject: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] > drm/i915: > > > Pre-calculate engine context size > > > > > > == Series Details == > > > > > > Series: series starting with [1/2] drm/i915: Pre-calculate engine > > > context size URL : https://patchwork.freedesktop.org/series/23559/ > > > State : warning > > > > > > == Summary == > > > > > > Series 23559v1 Series without cover letter > > > https://patchwork.freedesktop.org/api/1.0/series/23559/revisions/1/m > > > box/ > > > > > > > How about trybot first? > > What exactly is the point of running CI on intel-gfx traffic if everything is > supposed to be sent to trybot first? Sending trivial series to trybot first would > double the already untolerable latency of the system. intel-gfx is supposed to be Well I am thinking that first most obvious failures are caught by trybot and when passing there then sending for review, this is what eg. Chris is doing a lot. So that trybot serves developers pre-pre-criteria. But agree there is latency is there are work in the queue but not if drm, dif, dinf and pw and clean, then is just matter of 10+ minutes. > a place to discuss the patches among the developers, or you think we should > have intel-gfx-dev for that and everything but ready-to-merge series are sent > there? Well it is up to developers to decide, just saying that if patches are totally bogus, maybe test first not on the list but try-bot list ;) > > Regards, Joonas > -- Jani Saarinen Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo
On Wed, Apr 26, 2017 at 12:43:18PM +0000, Saarinen, Jani wrote: > Hi, > > -----Original Message----- > > From: Joonas Lahtinen [mailto:joonas.lahtinen@linux.intel.com] > > Sent: Wednesday, April 26, 2017 3:36 PM > > To: Saarinen, Jani <jani.saarinen@intel.com>; intel-gfx@lists.freedesktop.org > > Subject: Re: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] > > drm/i915: Pre-calculate engine context size > > > > On ke, 2017-04-26 at 10:43 +0000, Saarinen, Jani wrote: > > > > > > > > -----Original Message----- > > > > Subject: [Intel-gfx] ✗ Fi.CI.BAT: warning for series starting with [1/2] > > drm/i915: > > > > Pre-calculate engine context size > > > > > > > > == Series Details == > > > > > > > > Series: series starting with [1/2] drm/i915: Pre-calculate engine > > > > context size URL : https://patchwork.freedesktop.org/series/23559/ > > > > State : warning > > > > > > > > == Summary == > > > > > > > > Series 23559v1 Series without cover letter > > > > https://patchwork.freedesktop.org/api/1.0/series/23559/revisions/1/m > > > > box/ > > > > > > > > > > How about trybot first? > > > > What exactly is the point of running CI on intel-gfx traffic if everything is > > supposed to be sent to trybot first? Sending trivial series to trybot first would > > double the already untolerable latency of the system. intel-gfx is supposed to be > Well I am thinking that first most obvious failures are caught by trybot and when > passing there then sending for review, this is what eg. Chris is doing a lot. > So that trybot serves developers pre-pre-criteria. > But agree there is latency is there are work in the queue but not if drm, dif, dinf and pw and clean, > then is just matter of 10+ minutes. > > > a place to discuss the patches among the developers, or you think we should > > have intel-gfx-dev for that and everything but ready-to-merge series are sent > > there? > Well it is up to developers to decide, just saying that if patches are totally bogus, maybe test first > not on the list but try-bot list ;) Don't forget that Joonas also wanted to validate the design choice, which is something we want to do as early as possible as well. Nearly complete code is fine for sampling on whether the entire approach is heading in the right direction. And CI results are useful here as well, they catch oversights they may escape us mere humans. -Chris
On 26/04/17 02:57, Zhi Wang wrote: > Uh...sorry for not mentioning that before:), and stolen memory is not my > business. :( > > Actually we root-caused it. > > This is how we found this case: > > The story is W driver directly allocated the ring buffer after the > context image, and the context image size in W driver is 19 pages. GVT-g > will do shadow context during submission, we copy 20 pages from guest > context image, so you can see, an extra page is copied here as the > context image size is actual 19 pages. The extra page belows to ring > buffer. When guest updates that page with new commands during GVT-g > executing the workload, the extra page ( which is ring buffer page) will > be over-written with old content, since GVT-g will copy the shadow > context (20 pages) back to guest at this time. > > That's the full story. I send another email to Harsh. He should know if > the context image size of CHV is also 19 pages. > > Thanks, > Zhi. > I did a quick check and according to the specs both the BDW and the CHV lrcs are formed by 18096 dwords plus the per-context HWSP, which converts to 19 pages for both platforms. Regards, Daniele > 于 04/26/17 17:52, Joonas Lahtinen 写道: >> On ke, 2017-04-26 at 17:10 +0800, Zhi Wang wrote: >>> Hi Joonas: >>> Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? >>> Then we don't need the hack in GVT-g. :P Actually it's 19 pages not >>> 20 pages on BDW. >> The exception is only made for BDW, not Gen8 overall. Has the change >> been verified for CHV too? >> >> Why hasn't a patch to fix above been sent for i915 in the past? Just >> like in the stolen memory disabling case, bugs should be root caused >> and then fixed, not just worked around quickly. >> >> Regards, Joonas >
Hi Daniele: Thanks for the reply! Joonas and I did some researches in irc after the email. We found B-spec did say the context image for render engine consist 20 pages in context layout section. It looks like a mistake in b-spec. Another interesting we found is the context image size for KBL halo is weird, not the same with other KBL SKUs. Thanks, Zhi. 于 04/27/17 00:20, Daniele Ceraolo Spurio 写道: > > > On 26/04/17 02:57, Zhi Wang wrote: >> Uh...sorry for not mentioning that before:), and stolen memory is not my >> business. :( >> >> Actually we root-caused it. >> >> This is how we found this case: >> >> The story is W driver directly allocated the ring buffer after the >> context image, and the context image size in W driver is 19 pages. GVT-g >> will do shadow context during submission, we copy 20 pages from guest >> context image, so you can see, an extra page is copied here as the >> context image size is actual 19 pages. The extra page belows to ring >> buffer. When guest updates that page with new commands during GVT-g >> executing the workload, the extra page ( which is ring buffer page) will >> be over-written with old content, since GVT-g will copy the shadow >> context (20 pages) back to guest at this time. >> >> That's the full story. I send another email to Harsh. He should know if >> the context image size of CHV is also 19 pages. >> >> Thanks, >> Zhi. >> > > I did a quick check and according to the specs both the BDW and the > CHV lrcs are formed by 18096 dwords plus the per-context HWSP, which > converts to 19 pages for both platforms. > > Regards, > Daniele > >> 于 04/26/17 17:52, Joonas Lahtinen 写道: >>> On ke, 2017-04-26 at 17:10 +0800, Zhi Wang wrote: >>>> Hi Joonas: >>>> Can you change GEN8_LR_CONTEXT_RENDER_SIZE = (19 * PAGE_SIZE)? >>>> Then we don't need the hack in GVT-g. :P Actually it's 19 pages not >>>> 20 pages on BDW. >>> The exception is only made for BDW, not Gen8 overall. Has the change >>> been verified for CHV too? >>> >>> Why hasn't a patch to fix above been sent for i915 in the past? Just >>> like in the stolen memory disabling case, bugs should be root caused >>> and then fixed, not just worked around quickly. >>> >>> Regards, Joonas >> > _______________________________________________ > intel-gvt-dev mailing list > intel-gvt-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index a77db23..ac538dc 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; @@ -333,8 +332,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id, workload->ctx_desc.lrca); - context_page_num = intel_lr_context_size( - gvt->dev_priv->engine[ring_id]); + context_page_num = gvt->dev_priv->engine[ring_id]->context_size; context_page_num = context_page_num >> PAGE_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ab5140b..6c78637 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1051,8 +1051,7 @@ static int guc_ads_create(struct intel_guc *guc) dev_priv->engine[RCS]->status_page.ggtt_offset; for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - intel_lr_context_size(engine); + blob->ads.eng_state_size[engine->guc_id] = engine->context_size; base = guc_ggtt_offset(vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 82a274b..091c0c7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -26,6 +26,10 @@ #include "intel_ringbuffer.h" #include "intel_lrc.h" +#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) +#define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) + struct engine_class_info { const char *name; int (*init_legacy)(struct intel_engine_cs *engine); @@ -107,6 +111,46 @@ static const struct engine_info intel_engines[] = { }, }; +/** + * ___intel_engine_context_size() - return the size of the context for an engine + * @dev_priv: i915 device private + * @class: engine class + * + * Each engine class may require a different amount of space for a context + * image. + * + * Return: size (in bytes) of an engine class specific context image + * + * Note: this size includes the HWSP, which is part of the context image + * in LRC mode, but does not include the "shared data page" used with + * GuC submission. The caller should account for this if using the GuC. + */ +static u32 +__intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) +{ + WARN_ON(INTEL_GEN(dev_priv) < 8); + + switch (class) { + case RENDER_CLASS: + switch (INTEL_GEN(dev_priv)) { + default: + MISSING_CASE(INTEL_GEN(dev_priv)); + case 9: + return GEN9_LR_CONTEXT_RENDER_SIZE; + case 8: + return GEN8_LR_CONTEXT_RENDER_SIZE; + } + break; + case VIDEO_DECODE_CLASS: + case VIDEO_ENHANCEMENT_CLASS: + case COPY_ENGINE_CLASS: + return GEN8_LR_CONTEXT_OTHER_SIZE; + } + + MISSING_CASE(class); + return GEN8_LR_CONTEXT_OTHER_SIZE; +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) @@ -134,6 +178,8 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->irq_shift = info->irq_shift; engine->class = info->class; engine->instance = info->instance; + engine->context_size = __intel_engine_context_size(dev_priv, + engine->class); /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5ec064a..0909549 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -138,10 +138,6 @@ #include "i915_drv.h" #include "intel_mocs.h" -#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) -#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE) - #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) #define RING_EXECLIST0_VALID (1 << 0x4) @@ -1918,53 +1914,6 @@ populate_lr_context(struct i915_gem_context *ctx, return 0; } -/** - * intel_lr_context_size() - return the size of the context for an engine - * @engine: which engine to find the context size for - * - * Each engine may require a different amount of space for a context image, - * so when allocating (or copying) an image, this function can be used to - * find the right size for the specific engine. - * - * Return: size (in bytes) of an engine-specific context image - * - * Note: this size includes the HWSP, which is part of the context image - * in LRC mode, but does not include the "shared data page" used with - * GuC submission. The caller should account for this if using the GuC. - */ -uint32_t intel_lr_context_size(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - WARN_ON(INTEL_GEN(dev_priv) < 8); - - switch (engine->class) { - case RENDER_CLASS: - switch (INTEL_GEN(dev_priv)) { - default: - MISSING_CASE(INTEL_GEN(dev_priv)); - case 9: - ret = GEN9_LR_CONTEXT_RENDER_SIZE; - break; - case 8: - ret = GEN8_LR_CONTEXT_RENDER_SIZE; - break; - } - break; - - default: - MISSING_CASE(engine->class); - case VIDEO_DECODE_CLASS: - case VIDEO_ENHANCEMENT_CLASS: - case COPY_ENGINE_CLASS: - ret = GEN8_LR_CONTEXT_OTHER_SIZE; - break; - } - - return ret; -} - static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1977,8 +1926,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, WARN_ON(ce->state); - context_size = round_up(intel_lr_context_size(engine), - I915_GTT_PAGE_SIZE); + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); /* One extra page as the sharing data between driver and GuC */ context_size += PAGE_SIZE * LRC_PPHWSP_PN; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index e8015e7..52b3a1f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -78,8 +78,6 @@ int logical_xcs_ring_init(struct intel_engine_cs *engine); struct drm_i915_private; struct i915_gem_context; -uint32_t intel_lr_context_size(struct intel_engine_cs *engine); - void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 96710b6..598194d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -196,12 +196,13 @@ struct intel_engine_cs { enum intel_engine_id id; unsigned int uabi_id; unsigned int hw_id; + unsigned int guc_id; u8 class; u8 instance; + u32 context_size; - unsigned int guc_id; - u32 mmio_base; + u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; struct intel_timeline *timeline;
Pre-calculate engine context size based on engine class and device generation and store it in the engine instance. Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Oscar Mateo <oscar.mateo@intel.com> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> Cc: intel-gvt-dev@lists.freedesktop.org --- drivers/gpu/drm/i915/gvt/scheduler.c | 6 ++-- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- drivers/gpu/drm/i915/intel_engine_cs.c | 46 +++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 54 +----------------------------- drivers/gpu/drm/i915/intel_lrc.h | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 +-- 6 files changed, 53 insertions(+), 63 deletions(-)