diff mbox

[12/53] drm/i915/bdw: Populate LR contexts (somewhat)

Message ID 1402673891-14618-13-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com June 13, 2014, 3:37 p.m. UTC
From: Oscar Mateo <oscar.mateo@intel.com>

For the most part, logical ring context objects are similar to hardware
contexts in that the backing object is meant to be opaque. There are
some exceptions where we need to poke certain offsets of the object for
initialization, updating the tail pointer or updating the PDPs.

For our basic execlist implementation we'll only need our PPGTT PDs,
and ringbuffer addresses in order to set up the context. With previous
patches, we have both, so start prepping the context to be load.

Before running a context for the first time you must populate some
fields in the context object. These fields begin 1 PAGE + LRCA, ie. the
first page (in 0 based counting) of the context  image. These same
fields will be read and written to as contexts are saved and restored
once the system is up and running.

Many of these fields are completely reused from previous global
registers: ringbuffer head/tail/control, context control matches some
previous MI_SET_CONTEXT flags, and page directories. There are other
fields which we don't touch which we may want in the future.

v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11)
for other engines.

v3: Several rebases and general changes to the code.

v4: Squash with "Extract LR context object populating"
Also, Damien's review comments:
- Set the Force Posted bit on the LRI header, as the BSpec suggest we do.
- Prevent warning when compiling a 32-bits kernel without HIGHMEM64.
- Add a clarifying comment to the context population code.

v5: Damien's review comments:
- The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted.
- Remove dead code.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com> (v2)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v3-5)
---
 drivers/gpu/drm/i915/i915_reg.h  |   1 +
 drivers/gpu/drm/i915/intel_lrc.c | 154 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 4 deletions(-)

Comments

bradley.d.volkin@intel.com June 18, 2014, 11:24 p.m. UTC | #1
On Fri, Jun 13, 2014 at 08:37:30AM -0700, oscar.mateo@intel.com wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> For the most part, logical ring context objects are similar to hardware
> contexts in that the backing object is meant to be opaque. There are
> some exceptions where we need to poke certain offsets of the object for
> initialization, updating the tail pointer or updating the PDPs.
> 
> For our basic execlist implementation we'll only need our PPGTT PDs,
> and ringbuffer addresses in order to set up the context. With previous
> patches, we have both, so start prepping the context to be load.
> 
> Before running a context for the first time you must populate some
> fields in the context object. These fields begin 1 PAGE + LRCA, ie. the
> first page (in 0 based counting) of the context  image. These same
> fields will be read and written to as contexts are saved and restored
> once the system is up and running.
> 
> Many of these fields are completely reused from previous global
> registers: ringbuffer head/tail/control, context control matches some
> previous MI_SET_CONTEXT flags, and page directories. There are other
> fields which we don't touch which we may want in the future.
> 
> v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and (11)
> for other engines.
> 
> v3: Several rebases and general changes to the code.
> 
> v4: Squash with "Extract LR context object populating"
> Also, Damien's review comments:
> - Set the Force Posted bit on the LRI header, as the BSpec suggest we do.
> - Prevent warning when compiling a 32-bits kernel without HIGHMEM64.
> - Add a clarifying comment to the context population code.
> 
> v5: Damien's review comments:
> - The third MI_LOAD_REGISTER_IMM in the context does not set Force Posted.
> - Remove dead code.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com> (v2)
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v3-5)
> ---
>  drivers/gpu/drm/i915/i915_reg.h  |   1 +
>  drivers/gpu/drm/i915/intel_lrc.c | 154 ++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 151 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 286f05c..9c8692a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -277,6 +277,7 @@
>   *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
>   */
>  #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
> +#define   MI_LRI_FORCE_POSTED		(1<<12)
>  #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)
>  #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
>  #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b3a23e0..b96bb45 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -46,6 +46,38 @@
>  
>  #define GEN8_LR_CONTEXT_ALIGN 4096
>  
> +#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
> +#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
> +
> +#define CTX_LRI_HEADER_0		0x01
> +#define CTX_CONTEXT_CONTROL		0x02
> +#define CTX_RING_HEAD			0x04
> +#define CTX_RING_TAIL			0x06
> +#define CTX_RING_BUFFER_START		0x08
> +#define CTX_RING_BUFFER_CONTROL		0x0a
> +#define CTX_BB_HEAD_U			0x0c
> +#define CTX_BB_HEAD_L			0x0e
> +#define CTX_BB_STATE			0x10
> +#define CTX_SECOND_BB_HEAD_U		0x12
> +#define CTX_SECOND_BB_HEAD_L		0x14
> +#define CTX_SECOND_BB_STATE		0x16
> +#define CTX_BB_PER_CTX_PTR		0x18
> +#define CTX_RCS_INDIRECT_CTX		0x1a
> +#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
> +#define CTX_LRI_HEADER_1		0x21
> +#define CTX_CTX_TIMESTAMP		0x22
> +#define CTX_PDP3_UDW			0x24
> +#define CTX_PDP3_LDW			0x26
> +#define CTX_PDP2_UDW			0x28
> +#define CTX_PDP2_LDW			0x2a
> +#define CTX_PDP1_UDW			0x2c
> +#define CTX_PDP1_LDW			0x2e
> +#define CTX_PDP0_UDW			0x30
> +#define CTX_PDP0_LDW			0x32
> +#define CTX_LRI_HEADER_2		0x41
> +#define CTX_R_PWR_CLK_STATE		0x42
> +#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
> +
>  bool intel_enable_execlists(struct drm_device *dev)
>  {
>  	if (!i915.enable_execlists)
> @@ -54,6 +86,110 @@ bool intel_enable_execlists(struct drm_device *dev)
>  	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
>  }
>  
> +static int
> +populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
> +		    struct intel_engine_cs *ring, struct drm_i915_gem_object *ring_obj)
> +{
> +	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
> +	struct page *page;
> +	uint32_t *reg_state;
> +	int ret;
> +
> +	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> +	if (ret) {
> +		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
> +		return ret;
> +	}
> +
> +	ret = i915_gem_object_get_pages(ctx_obj);
> +	if (ret) {
> +		DRM_DEBUG_DRIVER("Could not get object pages\n");
> +		return ret;
> +	}
> +
> +	i915_gem_object_pin_pages(ctx_obj);
> +
> +	/* The second page of the context object contains some fields which must
> +	 * be set up prior to the first execution. */
> +	page = i915_gem_object_get_page(ctx_obj, 1);
> +	reg_state = kmap_atomic(page);
> +
> +	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
> +	 * commands followed by (reg, value) pairs. The values we are setting here are
> +	 * only for the first context restore: on a subsequent save, the GPU will
> +	 * recreate this batchbuffer with new values (including all the missing
> +	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
> +	if (ring->id == RCS)
> +		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
> +	else
> +		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
> +	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
> +	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
> +	reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | MI_RESTORE_INHIBIT;
> +	reg_state[CTX_CONTEXT_CONTROL+1] |= reg_state[CTX_CONTEXT_CONTROL+1] << 16;

If we can, we should probably use _MASKED_BIT_ENABLE() here to make it more
obvious why we're doing the or+shift.

> +	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
> +	reg_state[CTX_RING_HEAD+1] = 0;
> +	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
> +	reg_state[CTX_RING_TAIL+1] = 0;
> +	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
> +	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
> +	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
> +	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | RING_VALID;

The size here doesn't look right to me. Shouldn't it be (number of pages - 1)?
See init_ring_common()

> +	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
> +	reg_state[CTX_BB_HEAD_U+1] = 0;
> +	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
> +	reg_state[CTX_BB_HEAD_L+1] = 0;
> +	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
> +	reg_state[CTX_BB_STATE+1] = (1<<5);
> +	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
> +	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
> +	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
> +	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
> +	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
> +	reg_state[CTX_SECOND_BB_STATE+1] = 0;
> +	if (ring->id == RCS) {
> +		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
> +		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
> +		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
> +		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
> +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
> +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
> +	}
> +	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
> +	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
> +	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
> +	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
> +	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> +	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> +	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> +	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> +	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> +	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> +	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> +	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> +	reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32;
> +	reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3];
> +	reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32;
> +	reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2];
> +	reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32;
> +	reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1];
> +	reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32;
> +	reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0];

Are we able to use upper_32_bits() and lower_32_bits() for these?

Brad

> +	if (ring->id == RCS) {
> +		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> +		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
> +		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
> +	}
> +
> +	kunmap_atomic(reg_state);
> +
> +	ctx_obj->dirty = 1;
> +	set_page_dirty(page);
> +	i915_gem_object_unpin_pages(ctx_obj);
> +
> +	return 0;
> +}
> +
>  void intel_lr_context_free(struct intel_context *ctx)
>  {
>  	int i;
> @@ -145,14 +281,24 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>  	if (ret) {
>  		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
>  				ring->name, ret);
> -		kfree(ringbuf);
> -		i915_gem_object_ggtt_unpin(ctx_obj);
> -		drm_gem_object_unreference(&ctx_obj->base);
> -		return ret;
> +		goto error;
> +	}
> +
> +	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf->obj);
> +	if (ret) {
> +		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
> +		intel_destroy_ring_buffer(ringbuf);
> +		goto error;
>  	}
>  
>  	ctx->engine[ring->id].ringbuf = ringbuf;
>  	ctx->engine[ring->id].obj = ctx_obj;
>  
>  	return 0;
> +
> +error:
> +	kfree(ringbuf);
> +	i915_gem_object_ggtt_unpin(ctx_obj);
> +	drm_gem_object_unreference(&ctx_obj->base);
> +	return ret;
>  }
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
oscar.mateo@intel.com June 23, 2014, 12:42 p.m. UTC | #2
> -----Original Message-----
> From: Volkin, Bradley D
> Sent: Thursday, June 19, 2014 12:24 AM
> To: Mateo Lozano, Oscar
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 12/53] drm/i915/bdw: Populate LR contexts
> (somewhat)
> 
> On Fri, Jun 13, 2014 at 08:37:30AM -0700, oscar.mateo@intel.com wrote:
> > From: Oscar Mateo <oscar.mateo@intel.com>
> >
> > For the most part, logical ring context objects are similar to
> > hardware contexts in that the backing object is meant to be opaque.
> > There are some exceptions where we need to poke certain offsets of the
> > object for initialization, updating the tail pointer or updating the PDPs.
> >
> > For our basic execlist implementation we'll only need our PPGTT PDs,
> > and ringbuffer addresses in order to set up the context. With previous
> > patches, we have both, so start prepping the context to be load.
> >
> > Before running a context for the first time you must populate some
> > fields in the context object. These fields begin 1 PAGE + LRCA, ie.
> > the first page (in 0 based counting) of the context  image. These same
> > fields will be read and written to as contexts are saved and restored
> > once the system is up and running.
> >
> > Many of these fields are completely reused from previous global
> > registers: ringbuffer head/tail/control, context control matches some
> > previous MI_SET_CONTEXT flags, and page directories. There are other
> > fields which we don't touch which we may want in the future.
> >
> > v2: CTX_LRI_HEADER_0 is MI_LOAD_REGISTER_IMM(14) for render and
> (11)
> > for other engines.
> >
> > v3: Several rebases and general changes to the code.
> >
> > v4: Squash with "Extract LR context object populating"
> > Also, Damien's review comments:
> > - Set the Force Posted bit on the LRI header, as the BSpec suggest we do.
> > - Prevent warning when compiling a 32-bits kernel without HIGHMEM64.
> > - Add a clarifying comment to the context population code.
> >
> > v5: Damien's review comments:
> > - The third MI_LOAD_REGISTER_IMM in the context does not set Force
> Posted.
> > - Remove dead code.
> >
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> > Signed-off-by: Rafael Barbalho <rafael.barbalho@intel.com> (v2)
> > Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> (v3-5)
> > ---
> >  drivers/gpu/drm/i915/i915_reg.h  |   1 +
> >  drivers/gpu/drm/i915/intel_lrc.c | 154
> > ++++++++++++++++++++++++++++++++++++++-
> >  2 files changed, 151 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > b/drivers/gpu/drm/i915/i915_reg.h index 286f05c..9c8692a 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -277,6 +277,7 @@
> >   *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
> >   */
> >  #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
> > +#define   MI_LRI_FORCE_POSTED		(1<<12)
> >  #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)  #define
> > MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
> >  #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
> > diff --git a/drivers/gpu/drm/i915/intel_lrc.c
> > b/drivers/gpu/drm/i915/intel_lrc.c
> > index b3a23e0..b96bb45 100644
> > --- a/drivers/gpu/drm/i915/intel_lrc.c
> > +++ b/drivers/gpu/drm/i915/intel_lrc.c
> > @@ -46,6 +46,38 @@
> >
> >  #define GEN8_LR_CONTEXT_ALIGN 4096
> >
> > +#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
> > +#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
> > +
> > +#define CTX_LRI_HEADER_0		0x01
> > +#define CTX_CONTEXT_CONTROL		0x02
> > +#define CTX_RING_HEAD			0x04
> > +#define CTX_RING_TAIL			0x06
> > +#define CTX_RING_BUFFER_START		0x08
> > +#define CTX_RING_BUFFER_CONTROL		0x0a
> > +#define CTX_BB_HEAD_U			0x0c
> > +#define CTX_BB_HEAD_L			0x0e
> > +#define CTX_BB_STATE			0x10
> > +#define CTX_SECOND_BB_HEAD_U		0x12
> > +#define CTX_SECOND_BB_HEAD_L		0x14
> > +#define CTX_SECOND_BB_STATE		0x16
> > +#define CTX_BB_PER_CTX_PTR		0x18
> > +#define CTX_RCS_INDIRECT_CTX		0x1a
> > +#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
> > +#define CTX_LRI_HEADER_1		0x21
> > +#define CTX_CTX_TIMESTAMP		0x22
> > +#define CTX_PDP3_UDW			0x24
> > +#define CTX_PDP3_LDW			0x26
> > +#define CTX_PDP2_UDW			0x28
> > +#define CTX_PDP2_LDW			0x2a
> > +#define CTX_PDP1_UDW			0x2c
> > +#define CTX_PDP1_LDW			0x2e
> > +#define CTX_PDP0_UDW			0x30
> > +#define CTX_PDP0_LDW			0x32
> > +#define CTX_LRI_HEADER_2		0x41
> > +#define CTX_R_PWR_CLK_STATE		0x42
> > +#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
> > +
> >  bool intel_enable_execlists(struct drm_device *dev)  {
> >  	if (!i915.enable_execlists)
> > @@ -54,6 +86,110 @@ bool intel_enable_execlists(struct drm_device
> *dev)
> >  	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);  }
> >
> > +static int
> > +populate_lr_context(struct intel_context *ctx, struct
> drm_i915_gem_object *ctx_obj,
> > +		    struct intel_engine_cs *ring, struct drm_i915_gem_object
> > +*ring_obj) {
> > +	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
> > +	struct page *page;
> > +	uint32_t *reg_state;
> > +	int ret;
> > +
> > +	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
> > +	if (ret) {
> > +		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
> > +		return ret;
> > +	}
> > +
> > +	ret = i915_gem_object_get_pages(ctx_obj);
> > +	if (ret) {
> > +		DRM_DEBUG_DRIVER("Could not get object pages\n");
> > +		return ret;
> > +	}
> > +
> > +	i915_gem_object_pin_pages(ctx_obj);
> > +
> > +	/* The second page of the context object contains some fields which
> must
> > +	 * be set up prior to the first execution. */
> > +	page = i915_gem_object_get_page(ctx_obj, 1);
> > +	reg_state = kmap_atomic(page);
> > +
> > +	/* A context is actually a big batch buffer with several
> MI_LOAD_REGISTER_IMM
> > +	 * commands followed by (reg, value) pairs. The values we are
> setting here are
> > +	 * only for the first context restore: on a subsequent save, the GPU
> will
> > +	 * recreate this batchbuffer with new values (including all the missing
> > +	 * MI_LOAD_REGISTER_IMM commands that we are not initializing
> here). */
> > +	if (ring->id == RCS)
> > +		reg_state[CTX_LRI_HEADER_0] =
> MI_LOAD_REGISTER_IMM(14);
> > +	else
> > +		reg_state[CTX_LRI_HEADER_0] =
> MI_LOAD_REGISTER_IMM(11);
> > +	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
> > +	reg_state[CTX_CONTEXT_CONTROL] =
> RING_CONTEXT_CONTROL(ring);
> > +	reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) |
> MI_RESTORE_INHIBIT;
> > +	reg_state[CTX_CONTEXT_CONTROL+1] |=
> reg_state[CTX_CONTEXT_CONTROL+1]
> > +<< 16;
> 
> If we can, we should probably use _MASKED_BIT_ENABLE() here to make it
> more obvious why we're doing the or+shift.

ACK

> > +	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
> > +	reg_state[CTX_RING_HEAD+1] = 0;
> > +	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
> > +	reg_state[CTX_RING_TAIL+1] = 0;
> > +	reg_state[CTX_RING_BUFFER_START] = RING_START(ring-
> >mmio_base);
> > +	reg_state[CTX_RING_BUFFER_START+1] =
> i915_gem_obj_ggtt_offset(ring_obj);
> > +	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring-
> >mmio_base);
> > +	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) |
> > +RING_VALID;
> 
> The size here doesn't look right to me. Shouldn't it be (number of pages - 1)?
> See init_ring_common()

But that´s exactly what it is, right? 

Our ringbuf->size = 32 * PAGE_SIZE;
so we are setting 31 * PAGE_SIZE

> > +	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
> > +	reg_state[CTX_BB_HEAD_U+1] = 0;
> > +	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
> > +	reg_state[CTX_BB_HEAD_L+1] = 0;
> > +	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
> > +	reg_state[CTX_BB_STATE+1] = (1<<5);
> > +	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
> > +	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
> > +	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
> > +	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
> > +	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
> > +	reg_state[CTX_SECOND_BB_STATE+1] = 0;
> > +	if (ring->id == RCS) {
> > +		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base +
> 0x1c0;
> > +		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
> > +		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base +
> 0x1c4;
> > +		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
> > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring-
> >mmio_base + 0x1c8;
> > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
> > +	}
> > +	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
> > +	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
> > +	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
> > +	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
> > +	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> > +	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> > +	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> > +	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> > +	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> > +	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> > +	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> > +	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> > +	reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32;
> > +	reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3];
> > +	reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32;
> > +	reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2];
> > +	reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32;
> > +	reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1];
> > +	reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32;
> > +	reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0];
> 
> Are we able to use upper_32_bits() and lower_32_bits() for these?

ACK

-- Oscar
bradley.d.volkin@intel.com June 23, 2014, 3:05 p.m. UTC | #3
On Mon, Jun 23, 2014 at 05:42:50AM -0700, Mateo Lozano, Oscar wrote:
> > -----Original Message-----
> > From: Volkin, Bradley D
> > > +	reg_state[CTX_RING_HEAD+1] = 0;
> > > +	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
> > > +	reg_state[CTX_RING_TAIL+1] = 0;
> > > +	reg_state[CTX_RING_BUFFER_START] = RING_START(ring-
> > >mmio_base);
> > > +	reg_state[CTX_RING_BUFFER_START+1] =
> > i915_gem_obj_ggtt_offset(ring_obj);
> > > +	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring-
> > >mmio_base);
> > > +	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) |
> > > +RING_VALID;
> > 
> > The size here doesn't look right to me. Shouldn't it be (number of pages - 1)?
> > See init_ring_common()
> 
> But that´s exactly what it is, right? 
> 
> Our ringbuf->size = 32 * PAGE_SIZE;
> so we are setting 31 * PAGE_SIZE

Ok, on closer inspection, the result is correct because the Buffer Length
field happens to be bits 20:12. But it looked to me like a size-in-bytes
rather than the encoding I expected. So I guess I'd prefer that we do it
as in init_ring_common(), using ring_obj->base.size and the RING_NR_PAGES
mask so that it's a bit more obvious what's going on.

Thanks,
Brad

> 
> > > +	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
> > > +	reg_state[CTX_BB_HEAD_U+1] = 0;
> > > +	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
> > > +	reg_state[CTX_BB_HEAD_L+1] = 0;
> > > +	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
> > > +	reg_state[CTX_BB_STATE+1] = (1<<5);
> > > +	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
> > > +	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
> > > +	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
> > > +	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
> > > +	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
> > > +	reg_state[CTX_SECOND_BB_STATE+1] = 0;
> > > +	if (ring->id == RCS) {
> > > +		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base +
> > 0x1c0;
> > > +		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
> > > +		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base +
> > 0x1c4;
> > > +		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
> > > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring-
> > >mmio_base + 0x1c8;
> > > +		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
> > > +	}
> > > +	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
> > > +	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
> > > +	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
> > > +	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
> > > +	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> > > +	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> > > +	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> > > +	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> > > +	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> > > +	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> > > +	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> > > +	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> > > +	reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32;
> > > +	reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3];
> > > +	reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32;
> > > +	reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2];
> > > +	reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32;
> > > +	reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1];
> > > +	reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32;
> > > +	reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0];
> > 
> > Are we able to use upper_32_bits() and lower_32_bits() for these?
> 
> ACK
> 
> -- Oscar
oscar.mateo@intel.com June 23, 2014, 3:11 p.m. UTC | #4
> -----Original Message-----
> From: Volkin, Bradley D
> Sent: Monday, June 23, 2014 4:06 PM
> To: Mateo Lozano, Oscar
> Cc: intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 12/53] drm/i915/bdw: Populate LR contexts
> (somewhat)
> 
> On Mon, Jun 23, 2014 at 05:42:50AM -0700, Mateo Lozano, Oscar wrote:
> > > -----Original Message-----
> > > From: Volkin, Bradley D
> > > > +	reg_state[CTX_RING_HEAD+1] = 0;
> > > > +	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
> > > > +	reg_state[CTX_RING_TAIL+1] = 0;
> > > > +	reg_state[CTX_RING_BUFFER_START] = RING_START(ring-
> > > >mmio_base);
> > > > +	reg_state[CTX_RING_BUFFER_START+1] =
> > > i915_gem_obj_ggtt_offset(ring_obj);
> > > > +	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring-
> > > >mmio_base);
> > > > +	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) |
> > > > +RING_VALID;
> > >
> > > The size here doesn't look right to me. Shouldn't it be (number of pages -
> 1)?
> > > See init_ring_common()
> >
> > But that´s exactly what it is, right?
> >
> > Our ringbuf->size = 32 * PAGE_SIZE;
> > so we are setting 31 * PAGE_SIZE
> 
> Ok, on closer inspection, the result is correct because the Buffer Length field
> happens to be bits 20:12. But it looked to me like a size-in-bytes rather than
> the encoding I expected. So I guess I'd prefer that we do it as in
> init_ring_common(), using ring_obj->base.size and the RING_NR_PAGES
> mask so that it's a bit more obvious what's going on.
> 
> Thanks,
> Brad

Yeah, it probably looks less magical that way. Ok, will do.
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 286f05c..9c8692a 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -277,6 +277,7 @@ 
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define   MI_LRI_FORCE_POSTED		(1<<12)
 #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)
 #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index b3a23e0..b96bb45 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -46,6 +46,38 @@ 
 
 #define GEN8_LR_CONTEXT_ALIGN 4096
 
+#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
+#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
+
+#define CTX_LRI_HEADER_0		0x01
+#define CTX_CONTEXT_CONTROL		0x02
+#define CTX_RING_HEAD			0x04
+#define CTX_RING_TAIL			0x06
+#define CTX_RING_BUFFER_START		0x08
+#define CTX_RING_BUFFER_CONTROL		0x0a
+#define CTX_BB_HEAD_U			0x0c
+#define CTX_BB_HEAD_L			0x0e
+#define CTX_BB_STATE			0x10
+#define CTX_SECOND_BB_HEAD_U		0x12
+#define CTX_SECOND_BB_HEAD_L		0x14
+#define CTX_SECOND_BB_STATE		0x16
+#define CTX_BB_PER_CTX_PTR		0x18
+#define CTX_RCS_INDIRECT_CTX		0x1a
+#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
+#define CTX_LRI_HEADER_1		0x21
+#define CTX_CTX_TIMESTAMP		0x22
+#define CTX_PDP3_UDW			0x24
+#define CTX_PDP3_LDW			0x26
+#define CTX_PDP2_UDW			0x28
+#define CTX_PDP2_LDW			0x2a
+#define CTX_PDP1_UDW			0x2c
+#define CTX_PDP1_LDW			0x2e
+#define CTX_PDP0_UDW			0x30
+#define CTX_PDP0_LDW			0x32
+#define CTX_LRI_HEADER_2		0x41
+#define CTX_R_PWR_CLK_STATE		0x42
+#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
+
 bool intel_enable_execlists(struct drm_device *dev)
 {
 	if (!i915.enable_execlists)
@@ -54,6 +86,110 @@  bool intel_enable_execlists(struct drm_device *dev)
 	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
 }
 
+static int
+populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
+		    struct intel_engine_cs *ring, struct drm_i915_gem_object *ring_obj)
+{
+	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
+	struct page *page;
+	uint32_t *reg_state;
+	int ret;
+
+	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+		return ret;
+	}
+
+	ret = i915_gem_object_get_pages(ctx_obj);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not get object pages\n");
+		return ret;
+	}
+
+	i915_gem_object_pin_pages(ctx_obj);
+
+	/* The second page of the context object contains some fields which must
+	 * be set up prior to the first execution. */
+	page = i915_gem_object_get_page(ctx_obj, 1);
+	reg_state = kmap_atomic(page);
+
+	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
+	 * commands followed by (reg, value) pairs. The values we are setting here are
+	 * only for the first context restore: on a subsequent save, the GPU will
+	 * recreate this batchbuffer with new values (including all the missing
+	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
+	if (ring->id == RCS)
+		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
+	else
+		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
+	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
+	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
+	reg_state[CTX_CONTEXT_CONTROL+1] = (1<<3) | MI_RESTORE_INHIBIT;
+	reg_state[CTX_CONTEXT_CONTROL+1] |= reg_state[CTX_CONTEXT_CONTROL+1] << 16;
+	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
+	reg_state[CTX_RING_HEAD+1] = 0;
+	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
+	reg_state[CTX_RING_TAIL+1] = 0;
+	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
+	reg_state[CTX_RING_BUFFER_CONTROL+1] = (31 * PAGE_SIZE) | RING_VALID;
+	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
+	reg_state[CTX_BB_HEAD_U+1] = 0;
+	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
+	reg_state[CTX_BB_HEAD_L+1] = 0;
+	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
+	reg_state[CTX_BB_STATE+1] = (1<<5);
+	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
+	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
+	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
+	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
+	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
+	reg_state[CTX_SECOND_BB_STATE+1] = 0;
+	if (ring->id == RCS) {
+		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
+		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
+		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
+		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
+		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
+		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+	}
+	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
+	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
+	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
+	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
+	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
+	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
+	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
+	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
+	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
+	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
+	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
+	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
+	reg_state[CTX_PDP3_UDW+1] = (u64)ppgtt->pd_dma_addr[3] >> 32;
+	reg_state[CTX_PDP3_LDW+1] = ppgtt->pd_dma_addr[3];
+	reg_state[CTX_PDP2_UDW+1] = (u64)ppgtt->pd_dma_addr[2] >> 32;
+	reg_state[CTX_PDP2_LDW+1] = ppgtt->pd_dma_addr[2];
+	reg_state[CTX_PDP1_UDW+1] = (u64)ppgtt->pd_dma_addr[1] >> 32;
+	reg_state[CTX_PDP1_LDW+1] = ppgtt->pd_dma_addr[1];
+	reg_state[CTX_PDP0_UDW+1] = (u64)ppgtt->pd_dma_addr[0] >> 32;
+	reg_state[CTX_PDP0_LDW+1] = ppgtt->pd_dma_addr[0];
+	if (ring->id == RCS) {
+		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
+		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
+		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
+	}
+
+	kunmap_atomic(reg_state);
+
+	ctx_obj->dirty = 1;
+	set_page_dirty(page);
+	i915_gem_object_unpin_pages(ctx_obj);
+
+	return 0;
+}
+
 void intel_lr_context_free(struct intel_context *ctx)
 {
 	int i;
@@ -145,14 +281,24 @@  int intel_lr_context_deferred_create(struct intel_context *ctx,
 	if (ret) {
 		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
 				ring->name, ret);
-		kfree(ringbuf);
-		i915_gem_object_ggtt_unpin(ctx_obj);
-		drm_gem_object_unreference(&ctx_obj->base);
-		return ret;
+		goto error;
+	}
+
+	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf->obj);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+		intel_destroy_ring_buffer(ringbuf);
+		goto error;
 	}
 
 	ctx->engine[ring->id].ringbuf = ringbuf;
 	ctx->engine[ring->id].obj = ctx_obj;
 
 	return 0;
+
+error:
+	kfree(ringbuf);
+	i915_gem_object_ggtt_unpin(ctx_obj);
+	drm_gem_object_unreference(&ctx_obj->base);
+	return ret;
 }