diff mbox

[v2,24/24] drm/i915/bdw: Dynamic page table allocations in lrc mode

Message ID 1419354987-4622-25-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Dec. 23, 2014, 5:16 p.m. UTC
Logic ring contexts need to know the PDPs when they are populated. With
dynamic page table allocations, these PDPs may not exist yet.

Check if PDPs have been allocated and use the scratch page if they do
not exist yet.

Before submission, update the PDPs in the logic ring context as PDPs
have been allocated.

Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/intel_lrc.c | 80 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 70 insertions(+), 10 deletions(-)

Comments

Daniel Vetter Jan. 5, 2015, 2:59 p.m. UTC | #1
On Tue, Dec 23, 2014 at 05:16:27PM +0000, Michel Thierry wrote:
> Logic ring contexts need to know the PDPs when they are populated. With
> dynamic page table allocations, these PDPs may not exist yet.
> 
> Check if PDPs have been allocated and use the scratch page if they do
> not exist yet.
> 
> Before submission, update the PDPs in the logic ring context as PDPs
> have been allocated.
> 
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>

Patch subject is imo a bit misleading. What about "support dynamic pdp
updates in lrc mode"?
-Daniel

> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 80 +++++++++++++++++++++++++++++++++++-----
>  1 file changed, 70 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 546884b..6abe4bc 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -358,6 +358,7 @@ static void execlists_elsp_write(struct intel_engine_cs *ring,
>  
>  static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
>  				    struct drm_i915_gem_object *ring_obj,
> +				    struct i915_hw_ppgtt *ppgtt,
>  				    u32 tail)
>  {
>  	struct page *page;
> @@ -369,6 +370,40 @@ static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
>  	reg_state[CTX_RING_TAIL+1] = tail;
>  	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
>  
> +	/* True PPGTT with dynamic page allocation: update PDP registers and
> +	 * point the unallocated PDPs to the scratch page
> +	 */
> +	if (ppgtt) {
> +		if (test_bit(3, ppgtt->pdp.used_pdpes)) {
> +			reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> +			reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> +		} else {
> +			reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +			reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +		}
> +		if (test_bit(2, ppgtt->pdp.used_pdpes)) {
> +			reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> +			reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> +		} else {
> +			reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +			reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +		}
> +		if (test_bit(1, ppgtt->pdp.used_pdpes)) {
> +			reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> +			reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> +		} else {
> +			reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +			reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +		}
> +		if (test_bit(0, ppgtt->pdp.used_pdpes)) {
> +			reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> +			reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> +		} else {
> +			reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +			reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +		}
> +	}
> +
>  	kunmap_atomic(reg_state);
>  
>  	return 0;
> @@ -387,7 +422,7 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring,
>  	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
>  	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
>  
> -	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
> +	execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
>  
>  	if (to1) {
>  		ringbuf1 = to1->engine[ring->id].ringbuf;
> @@ -396,7 +431,7 @@ static void execlists_submit_contexts(struct intel_engine_cs *ring,
>  		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
>  		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
>  
> -		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
> +		execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
>  	}
>  
>  	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
> @@ -1731,14 +1766,39 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>  	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
>  	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
>  	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> -	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> -	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> -	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> -	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> -	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> -	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> -	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> -	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> +
> +	/* With dynamic page allocation, PDPs may not be allocated at this point,
> +	 * Point the unallocated PDPs to the scratch page
> +	 */
> +	if (test_bit(3, ppgtt->pdp.used_pdpes)) {
> +		reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> +		reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
> +	} else {
> +		reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +		reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +	}
> +	if (test_bit(2, ppgtt->pdp.used_pdpes)) {
> +		reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> +		reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
> +	} else {
> +		reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +		reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +	}
> +	if (test_bit(1, ppgtt->pdp.used_pdpes)) {
> +		reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> +		reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
> +	} else {
> +		reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +		reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +	}
> +	if (test_bit(0, ppgtt->pdp.used_pdpes)) {
> +		reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> +		reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
> +	} else {
> +		reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
> +		reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
> +	}
> +
>  	if (ring->id == RCS) {
>  		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>  		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
> -- 
> 2.1.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 546884b..6abe4bc 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -358,6 +358,7 @@  static void execlists_elsp_write(struct intel_engine_cs *ring,
 
 static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 				    struct drm_i915_gem_object *ring_obj,
+				    struct i915_hw_ppgtt *ppgtt,
 				    u32 tail)
 {
 	struct page *page;
@@ -369,6 +370,40 @@  static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
 	reg_state[CTX_RING_TAIL+1] = tail;
 	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
 
+	/* True PPGTT with dynamic page allocation: update PDP registers and
+	 * point the unallocated PDPs to the scratch page
+	 */
+	if (ppgtt) {
+		if (test_bit(3, ppgtt->pdp.used_pdpes)) {
+			reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+			reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+		} else {
+			reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+			reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+		}
+		if (test_bit(2, ppgtt->pdp.used_pdpes)) {
+			reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+			reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+		} else {
+			reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+			reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+		}
+		if (test_bit(1, ppgtt->pdp.used_pdpes)) {
+			reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+			reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+		} else {
+			reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+			reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+		}
+		if (test_bit(0, ppgtt->pdp.used_pdpes)) {
+			reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+			reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+		} else {
+			reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+			reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+		}
+	}
+
 	kunmap_atomic(reg_state);
 
 	return 0;
@@ -387,7 +422,7 @@  static void execlists_submit_contexts(struct intel_engine_cs *ring,
 	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
 	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
 
-	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
+	execlists_update_context(ctx_obj0, ringbuf0->obj, to0->ppgtt, tail0);
 
 	if (to1) {
 		ringbuf1 = to1->engine[ring->id].ringbuf;
@@ -396,7 +431,7 @@  static void execlists_submit_contexts(struct intel_engine_cs *ring,
 		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
 		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
 
-		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
+		execlists_update_context(ctx_obj1, ringbuf1->obj, to1->ppgtt, tail1);
 	}
 
 	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
@@ -1731,14 +1766,39 @@  populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
 	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
 	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
 	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
-	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
-	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
-	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
-	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
-	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
-	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
-	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+
+	/* With dynamic page allocation, PDPs may not be allocated at this point,
+	 * Point the unallocated PDPs to the scratch page
+	 */
+	if (test_bit(3, ppgtt->pdp.used_pdpes)) {
+		reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+		reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[3]->daddr);
+	} else {
+		reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+		reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+	}
+	if (test_bit(2, ppgtt->pdp.used_pdpes)) {
+		reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+		reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[2]->daddr);
+	} else {
+		reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+		reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+	}
+	if (test_bit(1, ppgtt->pdp.used_pdpes)) {
+		reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+		reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[1]->daddr);
+	} else {
+		reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+		reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+	}
+	if (test_bit(0, ppgtt->pdp.used_pdpes)) {
+		reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+		reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.pagedir[0]->daddr);
+	} else {
+		reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->scratch_pd->daddr);
+		reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->scratch_pd->daddr);
+	}
+
 	if (ring->id == RCS) {
 		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
 		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;