diff mbox

[v9,10/19] drm/i915/gen8: Add 4 level support in insert_entries and clear_range

Message ID 1438592007-3354-1-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Aug. 3, 2015, 8:53 a.m. UTC
When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
it will write to.

Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.

This patch was inspired by Ben's "Depend exclusively on map and
unmap_vma".

v2: Rebase after s/page_tables/page_table/.
v3: Remove unnecessary pdpe loop in gen8_ppgtt_clear_range_4lvl and use
clamp_pdp in gen8_ppgtt_insert_entries (Akash).
v4: Merge gen8_ppgtt_clear_range_4lvl into gen8_ppgtt_clear_range to
maintain symmetry with gen8_ppgtt_insert_entries (Akash).
v5: Do not mix pages and bytes in insert_entries (Akash).
v6: Prevent overflow in sg_nents << PAGE_SHIFT, when inserting 4GB at
once.
v7: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
Use gen8_px_index functions, and remove unnecessary number of pages
parameter in insert_pte_entries.
v8: Change gen8_ppgtt_clear_pte_range to stop at PDP boundary, instead of
adding and extra clamp function; remove unnecessary pdp_start/pdp_len
variables (Akash).
v9: pages->orig_nents instead of sg_nents(pages->sgl) to get the
length (Akash).
v10: Remove pdp warning check ingen8_ppgtt_insert_pte_entries until this
commit (Akash).

Reviewed-by: Akash Goel <akash.goel@intel.com> (v9)
Cc: Akash Goel <akash.goel@intel.com>
Signed-off-by: Michel Thierry <michel.thierry@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 16 deletions(-)

Comments

akash.goel@intel.com Aug. 3, 2015, 9:23 a.m. UTC | #1
Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goel@intel.com>"


On 8/3/2015 2:23 PM, Michel Thierry wrote:
> When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
> Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
> it will write to.
>
> Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.
>
> This patch was inspired by Ben's "Depend exclusively on map and
> unmap_vma".
>
> v2: Rebase after s/page_tables/page_table/.
> v3: Remove unnecessary pdpe loop in gen8_ppgtt_clear_range_4lvl and use
> clamp_pdp in gen8_ppgtt_insert_entries (Akash).
> v4: Merge gen8_ppgtt_clear_range_4lvl into gen8_ppgtt_clear_range to
> maintain symmetry with gen8_ppgtt_insert_entries (Akash).
> v5: Do not mix pages and bytes in insert_entries (Akash).
> v6: Prevent overflow in sg_nents << PAGE_SHIFT, when inserting 4GB at
> once.
> v7: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
> Use gen8_px_index functions, and remove unnecessary number of pages
> parameter in insert_pte_entries.
> v8: Change gen8_ppgtt_clear_pte_range to stop at PDP boundary, instead of
> adding and extra clamp function; remove unnecessary pdp_start/pdp_len
> variables (Akash).
> v9: pages->orig_nents instead of sg_nents(pages->sgl) to get the
> length (Akash).
> v10: Remove pdp warning check ingen8_ppgtt_insert_pte_entries until this
> commit (Akash).
>
> Reviewed-by: Akash Goel <akash.goel@intel.com> (v9)
> Cc: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++++++++++++++++++++++++------------
>   1 file changed, 36 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 31fc672..d5ae5de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -681,9 +681,9 @@ static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
>   	struct i915_hw_ppgtt *ppgtt =
>   		container_of(vm, struct i915_hw_ppgtt, base);
>   	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>   	unsigned num_entries = length >> PAGE_SHIFT;
>   	unsigned last_pte, i;
>
> @@ -722,7 +722,8 @@ static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
>
>   		pte = 0;
>   		if (++pde == I915_PDES) {
> -			pdpe++;
> +			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
> +				break;
>   			pde = 0;
>   		}
>   	}
> @@ -735,12 +736,21 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>   {
>   	struct i915_hw_ppgtt *ppgtt =
>   		container_of(vm, struct i915_hw_ppgtt, base);
> -	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
> -
>   	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
>   						 I915_CACHE_LLC, use_scratch);
>
> -	gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
> +					   scratch_pte);
> +	} else {
> +		uint64_t templ4, pml4e;
> +		struct i915_page_directory_pointer *pdp;
> +
> +		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
> +			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
> +						   scratch_pte);
> +		}
> +	}
>   }
>
>   static void
> @@ -753,16 +763,13 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
>   	struct i915_hw_ppgtt *ppgtt =
>   		container_of(vm, struct i915_hw_ppgtt, base);
>   	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>
>   	pt_vaddr = NULL;
>
>   	while (__sg_page_iter_next(sg_iter)) {
> -		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
> -			break;
> -
>   		if (pt_vaddr == NULL) {
>   			struct i915_page_directory *pd = pdp->page_directory[pdpe];
>   			struct i915_page_table *pt = pd->page_table[pde];
> @@ -776,7 +783,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
>   			kunmap_px(ppgtt, pt_vaddr);
>   			pt_vaddr = NULL;
>   			if (++pde == I915_PDES) {
> -				pdpe++;
> +				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
> +					break;
>   				pde = 0;
>   			}
>   			pte = 0;
> @@ -795,11 +803,23 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>   {
>   	struct i915_hw_ppgtt *ppgtt =
>   		container_of(vm, struct i915_hw_ppgtt, base);
> -	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
>   	struct sg_page_iter sg_iter;
>
>   	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
> -	gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, start, cache_level);
> +
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
> +					      cache_level);
> +	} else {
> +		struct i915_page_directory_pointer *pdp;
> +		uint64_t templ4, pml4e;
> +		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
> +
> +		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
> +			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
> +						      start, cache_level);
> +		}
> +	}
>   }
>
>   static void gen8_free_page_tables(struct drm_device *dev,
>
Daniel Vetter Aug. 5, 2015, 3:46 p.m. UTC | #2
On Mon, Aug 03, 2015 at 09:53:27AM +0100, Michel Thierry wrote:
> When 48b is enabled, gen8_ppgtt_insert_entries needs to read the Page Map
> Level 4 (PML4), before it selects which Page Directory Pointer (PDP)
> it will write to.
> 
> Similarly, gen8_ppgtt_clear_range needs to get the correct PDP/PD range.
> 
> This patch was inspired by Ben's "Depend exclusively on map and
> unmap_vma".
> 
> v2: Rebase after s/page_tables/page_table/.
> v3: Remove unnecessary pdpe loop in gen8_ppgtt_clear_range_4lvl and use
> clamp_pdp in gen8_ppgtt_insert_entries (Akash).
> v4: Merge gen8_ppgtt_clear_range_4lvl into gen8_ppgtt_clear_range to
> maintain symmetry with gen8_ppgtt_insert_entries (Akash).
> v5: Do not mix pages and bytes in insert_entries (Akash).
> v6: Prevent overflow in sg_nents << PAGE_SHIFT, when inserting 4GB at
> once.
> v7: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
> Use gen8_px_index functions, and remove unnecessary number of pages
> parameter in insert_pte_entries.
> v8: Change gen8_ppgtt_clear_pte_range to stop at PDP boundary, instead of
> adding and extra clamp function; remove unnecessary pdp_start/pdp_len
> variables (Akash).
> v9: pages->orig_nents instead of sg_nents(pages->sgl) to get the
> length (Akash).
> v10: Remove pdp warning check ingen8_ppgtt_insert_pte_entries until this
> commit (Akash).
> 
> Reviewed-by: Akash Goel <akash.goel@intel.com> (v9)
> Cc: Akash Goel <akash.goel@intel.com>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 52 +++++++++++++++++++++++++------------
>  1 file changed, 36 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 31fc672..d5ae5de 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -681,9 +681,9 @@ static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  	unsigned num_entries = length >> PAGE_SHIFT;
>  	unsigned last_pte, i;
>  
> @@ -722,7 +722,8 @@ static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
>  
>  		pte = 0;
>  		if (++pde == I915_PDES) {
> -			pdpe++;
> +			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
> +				break;
>  			pde = 0;
>  		}
>  	}
> @@ -735,12 +736,21 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> -	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
> -
>  	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
>  						 I915_CACHE_LLC, use_scratch);
>  
> -	gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {

Hm, this isn't pretty, and looking through earlier patches you have a lot
of if (48BIT) functions right at the topmost level where we have vfuncs,
e.g. gen8_ppgtt_cleanup. Imo much better to just do a
gen8_legacy_ppgtt_cleanup and gen8_4lvl_ppgtt_cleanup. Yeah means we
duplicate the call to free_scracth but really that's meh - we committed to
that abstraction so let's use it.

But reworking all the patches to get rid of all the 48bit ifs and
exploiting the vfunc abstraction we have will be a bit of work, so I'll
keep merging and sign you up for that follow-up task. The usual design
when we have vfuncs should be:
- do per-platform vfuncs everywhere you need a split
- for functionality shared between different vfuncs extract common helper
  functions and call them from both places.

E.g. for this case here I think we need a new 4lvl insert_entries
function which calls the existing one in a loop, and a 3lvl inser_entries
function which calls the existing one for the single legacy pdp. Make 2
copies of this and pull out the if to the top-level of each, then
simplify.

If we have abstraction in the form of vfuncs _and_ pile in lots of ifs at
low level then we pay both the price for the abstraction and the price for
tightly nit code, i.e. both downsides without an upside.

Anway, I expect follow-up work here ;-)

Thanks, Daniel

> +		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
> +					   scratch_pte);
> +	} else {
> +		uint64_t templ4, pml4e;
> +		struct i915_page_directory_pointer *pdp;
> +
> +		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
> +			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
> +						   scratch_pte);
> +		}
> +	}
>  }
>  
>  static void
> @@ -753,16 +763,13 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
>  	gen8_pte_t *pt_vaddr;
> -	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
> -	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
> -	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
> +	unsigned pdpe = gen8_pdpe_index(start);
> +	unsigned pde = gen8_pde_index(start);
> +	unsigned pte = gen8_pte_index(start);
>  
>  	pt_vaddr = NULL;
>  
>  	while (__sg_page_iter_next(sg_iter)) {
> -		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
> -			break;
> -
>  		if (pt_vaddr == NULL) {
>  			struct i915_page_directory *pd = pdp->page_directory[pdpe];
>  			struct i915_page_table *pt = pd->page_table[pde];
> @@ -776,7 +783,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
>  			kunmap_px(ppgtt, pt_vaddr);
>  			pt_vaddr = NULL;
>  			if (++pde == I915_PDES) {
> -				pdpe++;
> +				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
> +					break;
>  				pde = 0;
>  			}
>  			pte = 0;
> @@ -795,11 +803,23 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
>  {
>  	struct i915_hw_ppgtt *ppgtt =
>  		container_of(vm, struct i915_hw_ppgtt, base);
> -	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
>  	struct sg_page_iter sg_iter;
>  
>  	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
> -	gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, start, cache_level);
> +
> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
> +		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
> +					      cache_level);
> +	} else {
> +		struct i915_page_directory_pointer *pdp;
> +		uint64_t templ4, pml4e;
> +		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
> +
> +		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
> +			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
> +						      start, cache_level);
> +		}
> +	}
>  }
>  
>  static void gen8_free_page_tables(struct drm_device *dev,
> -- 
> 2.5.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Michel Thierry Aug. 5, 2015, 4:13 p.m. UTC | #3
On 8/5/2015 4:46 PM, Daniel Vetter wrote:
> On Mon, Aug 03, 2015 at 09:53:27AM +0100, Michel Thierry wrote:
>> @@ -735,12 +736,21 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
>>   {
>>   	struct i915_hw_ppgtt *ppgtt =
>>   		container_of(vm, struct i915_hw_ppgtt, base);
>> -	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
>> -
>>   	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
>>   						 I915_CACHE_LLC, use_scratch);
>>
>> -	gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
>> +	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
>
> Hm, this isn't pretty, and looking through earlier patches you have a lot
> of if (48BIT) functions right at the topmost level where we have vfuncs,
> e.g. gen8_ppgtt_cleanup. Imo much better to just do a
> gen8_legacy_ppgtt_cleanup and gen8_4lvl_ppgtt_cleanup. Yeah means we
> duplicate the call to free_scracth but really that's meh - we committed to
> that abstraction so let's use it.
>
> But reworking all the patches to get rid of all the 48bit ifs and
> exploiting the vfunc abstraction we have will be a bit of work, so I'll
> keep merging and sign you up for that follow-up task. The usual design
> when we have vfuncs should be:
> - do per-platform vfuncs everywhere you need a split
> - for functionality shared between different vfuncs extract common helper
>    functions and call them from both places.
>
> E.g. for this case here I think we need a new 4lvl insert_entries
> function which calls the existing one in a loop, and a 3lvl inser_entries
> function which calls the existing one for the single legacy pdp. Make 2
> copies of this and pull out the if to the top-level of each, then
> simplify.
>
> If we have abstraction in the form of vfuncs _and_ pile in lots of ifs at
> low level then we pay both the price for the abstraction and the price for
> tightly nit code, i.e. both downsides without an upside.
>
> Anway, I expect follow-up work here ;-)
>
> Thanks, Daniel
>

Yes, all the main functions (alloc, clear, cleanup, dump, insert) have

if (USES_FULL_48BIT_PPGTT)
	pml4 function
else
	pdp function

I'll make a patch to set the correct ones as vfuncs in gen8_ppgtt_init.

-Michel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 31fc672..d5ae5de 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -681,9 +681,9 @@  static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	unsigned num_entries = length >> PAGE_SHIFT;
 	unsigned last_pte, i;
 
@@ -722,7 +722,8 @@  static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
 
 		pte = 0;
 		if (++pde == I915_PDES) {
-			pdpe++;
+			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+				break;
 			pde = 0;
 		}
 	}
@@ -735,12 +736,21 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
-
 	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
 						 I915_CACHE_LLC, use_scratch);
 
-	gen8_ppgtt_clear_pte_range(vm, pdp, start, length, scratch_pte);
+	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
+					   scratch_pte);
+	} else {
+		uint64_t templ4, pml4e;
+		struct i915_page_directory_pointer *pdp;
+
+		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
+						   scratch_pte);
+		}
+	}
 }
 
 static void
@@ -753,16 +763,13 @@  gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_pte_t *pt_vaddr;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 
 	pt_vaddr = NULL;
 
 	while (__sg_page_iter_next(sg_iter)) {
-		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
-			break;
-
 		if (pt_vaddr == NULL) {
 			struct i915_page_directory *pd = pdp->page_directory[pdpe];
 			struct i915_page_table *pt = pd->page_table[pde];
@@ -776,7 +783,8 @@  gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
 			kunmap_px(ppgtt, pt_vaddr);
 			pt_vaddr = NULL;
 			if (++pde == I915_PDES) {
-				pdpe++;
+				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
+					break;
 				pde = 0;
 			}
 			pte = 0;
@@ -795,11 +803,23 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 {
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
-	struct i915_page_directory_pointer *pdp = &ppgtt->pdp; /* FIXME: 48b */
 	struct sg_page_iter sg_iter;
 
 	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
-	gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, start, cache_level);
+
+	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
+		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
+					      cache_level);
+	} else {
+		struct i915_page_directory_pointer *pdp;
+		uint64_t templ4, pml4e;
+		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
+
+		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
+			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
+						      start, cache_level);
+		}
+	}
 }
 
 static void gen8_free_page_tables(struct drm_device *dev,