diff mbox

[12/26] drm/i915: Page table helpers, and define renames

Message ID 1395121738-29126-13-git-send-email-benjamin.widawsky@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky March 18, 2014, 5:48 a.m. UTC
These page table helpers make the code much cleaner. There is some
room to use the arch/x86 header files. The reason I've opted not to is
in several cases, the definitions are dictated by the CONFIG_ options
which do not always indicate the restrictions in the GPU. While here,
clean up the defines to have more concise names, and consolidate between
gen6 and gen8 where appropriate.

I've made a lot of tiny errors in these helpers. Often I'd correct an
error only to introduce another one. While IGT was capable of catching
them, the tests often took a while to catch, and where hard/slow to
debug in the kernel. As a result, to test this, I compiled
i915_gem_gtt.h in userspace, and ran tests from userspace. What follows
isn't by any means complete, but it was able to catch lot of bugs. Gen8
is also untested, but since the current code is almost identical, I feel
pretty comfortable with that.

void test_pte(uint32_t base) {
        uint32_t ret;
        assert_pte_index((base + 0), 0);
        assert_pte_index((base + 1), 0);
        assert_pte_index((base + 0x1000), 1);
        assert_pte_index((base + (1<<22)), 0);
        assert_pte_index((base + ((1<<22) - 1)), 1023);
        assert_pte_index((base + (1<<21)), 512);

        assert_pte_count(base + 0, 0, 0);
        assert_pte_count(base + 0, 1, 1);
        assert_pte_count(base + 0, 0x1000, 1);
        assert_pte_count(base + 0, 0x1001, 2);
        assert_pte_count(base + 0, 1<<21, 512);

        assert_pte_count(base + 0, 1<<22, 1024);
        assert_pte_count(base + 0, (1<<22) - 1, 1024);
        assert_pte_count(base + (1<<21), 1<<22, 512);
        assert_pte_count(base + (1<<21), (1<<22)+1, 512);
        assert_pte_count(base + (1<<21), 10<<22, 512);
}

void test_pde(uint32_t base) {
        assert(gen6_pde_index(base + 0) == 0);
        assert(gen6_pde_index(base + 1) == 0);
        assert(gen6_pde_index(base + (1<<21)) == 0);
        assert(gen6_pde_index(base + (1<<22)) == 1);
        assert(gen6_pde_index(base + ((256<<22)))== 256);
        assert(gen6_pde_index(base + ((512<<22))) == 0);
        assert(gen6_pde_index(base + ((513<<22))) == 1); /* This is
actually not possible on gen6 */

        assert(gen6_pde_count(base + 0, 0) == 0);
        assert(gen6_pde_count(base + 0, 1) == 1);
        assert(gen6_pde_count(base + 0, 1<<21) == 1);
        assert(gen6_pde_count(base + 0, 1<<22) == 1);
        assert(gen6_pde_count(base + 0, (1<<22) + 0x1000) == 2);
        assert(gen6_pde_count(base + 0x1000, 1<<22) == 2);
        assert(gen6_pde_count(base + 0, 511<<22) == 511);
        assert(gen6_pde_count(base + 0, 512<<22) == 512);
        assert(gen6_pde_count(base + 0x1000, 512<<22) == 512);
        assert(gen6_pde_count(base + (1<<22), 512<<22) == 511);
}

int main()
{
        test_pde(0);
        while (1)
                test_pte(rand() & ~((1<<22) - 1));

        return 0;
}

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c |  90 +++++++++++++-------------
 drivers/gpu/drm/i915/i915_gem_gtt.h | 125 ++++++++++++++++++++++++++++++++++--
 2 files changed, 162 insertions(+), 53 deletions(-)

Comments

Chris Wilson March 18, 2014, 9:05 a.m. UTC | #1
On Mon, Mar 17, 2014 at 10:48:44PM -0700, Ben Widawsky wrote:
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -1,8 +1,11 @@
>  #ifndef _I915_GEM_GTT_H
>  #define _I915_GEM_GTT_H
>  
> -#define GEN6_PPGTT_PD_ENTRIES 512
> -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +/* GEN Agnostic defines */
> +#define I915_PDES_PER_PD		512
> +#define I915_PTE_MASK			(PAGE_SHIFT-1)

That looks decidely fishy.

PAGE_SHIFT is 12 -> PTE_MASK = 0xb

> +#define I915_PDE_MASK			(I915_PDES_PER_PD-1)
> +
>  typedef uint32_t gen6_gtt_pte_t;
>  typedef uint64_t gen8_gtt_pte_t;
>  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> @@ -23,6 +26,98 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
>  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
>  
> +
> +/* GEN6 PPGTT resembles a 2 level page table:
> + * 31:22 | 21:12 |  11:0
> + *  PDE  |  PTE  | offset
> + */
> +#define GEN6_PDE_SHIFT			22
> +#define GEN6_PTES_PER_PT		(PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> +
> +static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
> +{
> +	const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
> +	return (address >> PAGE_SHIFT) & mask;
> +}
> +
> +/* Helper to counts the number of PTEs within the given length. This count does
> + * not cross a page table boundary, so the max value would be
> + * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
> + */
> +static inline size_t i915_pte_count(uint64_t addr, size_t length,
> +				    uint32_t pde_shift)
> +{
> +	const uint64_t pd_mask = ~((1 << pde_shift) - 1);
> +	uint64_t end;
> +
> +	if (WARN_ON(!length))
> +		return 0;
> +
> +	if (WARN_ON(addr % PAGE_SIZE))
> +		addr = round_down(addr, PAGE_SIZE);
> +
> +	if (WARN_ON(length % PAGE_SIZE))
> +		length = round_up(length, PAGE_SIZE);

Oh oh. I think these fixups are very suspect, so just
BUG_ON(length == 0);
BUG_ON(offset_in_page(addr|length));

> +
> +	end = addr + length;
> +
> +	if ((addr & pd_mask) != (end & pd_mask))
> +		return (1 << (pde_shift - PAGE_SHIFT)) -

#define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
here and for computing the pd_mask.

> +			i915_pte_index(addr, pde_shift);
> +
> +	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
> +}

Otherwise the helpers look a useful improvement in readability.
-Chris
Jesse Barnes March 18, 2014, 6:29 p.m. UTC | #2
On Tue, 18 Mar 2014 09:05:58 +0000
Chris Wilson <chris@chris-wilson.co.uk> wrote:

> On Mon, Mar 17, 2014 at 10:48:44PM -0700, Ben Widawsky wrote:
> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > @@ -1,8 +1,11 @@
> >  #ifndef _I915_GEM_GTT_H
> >  #define _I915_GEM_GTT_H
> >  
> > -#define GEN6_PPGTT_PD_ENTRIES 512
> > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > +/* GEN Agnostic defines */
> > +#define I915_PDES_PER_PD		512
> > +#define I915_PTE_MASK			(PAGE_SHIFT-1)
> 
> That looks decidely fishy.
> 
> PAGE_SHIFT is 12 -> PTE_MASK = 0xb
> 
> > +#define I915_PDE_MASK			(I915_PDES_PER_PD-1)
> > +
> >  typedef uint32_t gen6_gtt_pte_t;
> >  typedef uint64_t gen8_gtt_pte_t;
> >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> > @@ -23,6 +26,98 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> >  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> >  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> >  
> > +
> > +/* GEN6 PPGTT resembles a 2 level page table:
> > + * 31:22 | 21:12 |  11:0
> > + *  PDE  |  PTE  | offset
> > + */
> > +#define GEN6_PDE_SHIFT			22
> > +#define GEN6_PTES_PER_PT		(PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > +
> > +static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
> > +{
> > +	const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
> > +	return (address >> PAGE_SHIFT) & mask;
> > +}
> > +
> > +/* Helper to counts the number of PTEs within the given length. This count does
> > + * not cross a page table boundary, so the max value would be
> > + * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
> > + */
> > +static inline size_t i915_pte_count(uint64_t addr, size_t length,
> > +				    uint32_t pde_shift)
> > +{
> > +	const uint64_t pd_mask = ~((1 << pde_shift) - 1);
> > +	uint64_t end;
> > +
> > +	if (WARN_ON(!length))
> > +		return 0;
> > +
> > +	if (WARN_ON(addr % PAGE_SIZE))
> > +		addr = round_down(addr, PAGE_SIZE);
> > +
> > +	if (WARN_ON(length % PAGE_SIZE))
> > +		length = round_up(length, PAGE_SIZE);
> 
> Oh oh. I think these fixups are very suspect, so just
> BUG_ON(length == 0);
> BUG_ON(offset_in_page(addr|length));
> 
> > +
> > +	end = addr + length;
> > +
> > +	if ((addr & pd_mask) != (end & pd_mask))
> > +		return (1 << (pde_shift - PAGE_SHIFT)) -
> 
> #define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
> here and for computing the pd_mask.
> 
> > +			i915_pte_index(addr, pde_shift);
> > +
> > +	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
> > +}
> 
> Otherwise the helpers look a useful improvement in readability.
> -Chris
> 

Can we use GTT_PAGE_SIZE here too?  I'm worried the kernel PAGE_SIZE
will change at some point and blow us up.  At least in places where
we're doing our own thing rather than using the x86 bits...
Ben Widawsky March 19, 2014, 12:58 a.m. UTC | #3
On Tue, Mar 18, 2014 at 11:29:58AM -0700, Jesse Barnes wrote:
> On Tue, 18 Mar 2014 09:05:58 +0000
> Chris Wilson <chris@chris-wilson.co.uk> wrote:
> 
> > On Mon, Mar 17, 2014 at 10:48:44PM -0700, Ben Widawsky wrote:
> > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> > > @@ -1,8 +1,11 @@
> > >  #ifndef _I915_GEM_GTT_H
> > >  #define _I915_GEM_GTT_H
> > >  
> > > -#define GEN6_PPGTT_PD_ENTRIES 512
> > > -#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > > +/* GEN Agnostic defines */
> > > +#define I915_PDES_PER_PD		512
> > > +#define I915_PTE_MASK			(PAGE_SHIFT-1)
> > 
> > That looks decidely fishy.
> > 
> > PAGE_SHIFT is 12 -> PTE_MASK = 0xb
> > 

Thanks for catching this. I'll presume the define isn't even used.

> > > +#define I915_PDE_MASK			(I915_PDES_PER_PD-1)
> > > +
> > >  typedef uint32_t gen6_gtt_pte_t;
> > >  typedef uint64_t gen8_gtt_pte_t;
> > >  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> > > @@ -23,6 +26,98 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
> > >  #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
> > >  #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
> > >  
> > > +
> > > +/* GEN6 PPGTT resembles a 2 level page table:
> > > + * 31:22 | 21:12 |  11:0
> > > + *  PDE  |  PTE  | offset
> > > + */
> > > +#define GEN6_PDE_SHIFT			22
> > > +#define GEN6_PTES_PER_PT		(PAGE_SIZE / sizeof(gen6_gtt_pte_t))
> > > +
> > > +static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
> > > +{
> > > +	const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
> > > +	return (address >> PAGE_SHIFT) & mask;
> > > +}
> > > +
> > > +/* Helper to counts the number of PTEs within the given length. This count does
> > > + * not cross a page table boundary, so the max value would be
> > > + * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
> > > + */
> > > +static inline size_t i915_pte_count(uint64_t addr, size_t length,
> > > +				    uint32_t pde_shift)
> > > +{
> > > +	const uint64_t pd_mask = ~((1 << pde_shift) - 1);
> > > +	uint64_t end;
> > > +
> > > +	if (WARN_ON(!length))
> > > +		return 0;
> > > +
> > > +	if (WARN_ON(addr % PAGE_SIZE))
> > > +		addr = round_down(addr, PAGE_SIZE);
> > > +
> > > +	if (WARN_ON(length % PAGE_SIZE))
> > > +		length = round_up(length, PAGE_SIZE);
> > 
> > Oh oh. I think these fixups are very suspect, so just
> > BUG_ON(length == 0);
> > BUG_ON(offset_in_page(addr|length));
> > 

I thought someone might have an issue with the BUG_ON. But I prefer it
as well.

> > > +
> > > +	end = addr + length;
> > > +
> > > +	if ((addr & pd_mask) != (end & pd_mask))
> > > +		return (1 << (pde_shift - PAGE_SHIFT)) -
> > 
> > #define NUM_PTE(pde_shift) (1 << (pde_shift - PAGE_SHIFT))
> > here and for computing the pd_mask.
> > 
> > > +			i915_pte_index(addr, pde_shift);
> > > +
> > > +	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
> > > +}
> > 
> > Otherwise the helpers look a useful improvement in readability.
> > -Chris
> > 
> 
> Can we use GTT_PAGE_SIZE here too?  I'm worried the kernel PAGE_SIZE
> will change at some point and blow us up.  At least in places where
> we're doing our own thing rather than using the x86 bits...

That's fine with me. We have quite a few other places in our code which
depend on PAGE_SIZE being 4k though.

It's likely I'll be maintaining this branch myself for a while, but I'll
modify these both locally.

> 
> -- 
> Jesse Barnes, Intel Open Source Technology Center
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 77556ac..7afa5f4 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -220,7 +220,7 @@  static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	int i, ret;
 
 	/* bit of a hack to find the actual last used pd */
-	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
+	int used_pd = ppgtt->num_pd_entries / I915_PDES_PER_PD;
 
 	for (i = used_pd - 1; i >= 0; i--) {
 		dma_addr_t addr = ppgtt->pd_dma_addr[i];
@@ -240,9 +240,9 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	unsigned num_entries = length >> PAGE_SHIFT;
 	unsigned last_pte, i;
 
@@ -253,8 +253,8 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		struct page *page_table = ppgtt->gen8_pt_pages[pdpe][pde];
 
 		last_pte = pte + num_entries;
-		if (last_pte > GEN8_PTES_PER_PAGE)
-			last_pte = GEN8_PTES_PER_PAGE;
+		if (last_pte > GEN8_PTES_PER_PT)
+			last_pte = GEN8_PTES_PER_PT;
 
 		pt_vaddr = kmap_atomic(page_table);
 
@@ -266,7 +266,7 @@  static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
 		kunmap_atomic(pt_vaddr);
 
 		pte = 0;
-		if (++pde == GEN8_PDES_PER_PAGE) {
+		if (++pde == I915_PDES_PER_PD) {
 			pdpe++;
 			pde = 0;
 		}
@@ -281,9 +281,9 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen8_gtt_pte_t *pt_vaddr;
-	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
-	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
-	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
+	unsigned pdpe = gen8_pdpe_index(start);
+	unsigned pde = gen8_pde_index(start);
+	unsigned pte = gen8_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
@@ -298,10 +298,10 @@  static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
 		pt_vaddr[pte] =
 			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
 					cache_level, true);
-		if (++pte == GEN8_PTES_PER_PAGE) {
+		if (++pte == GEN8_PTES_PER_PT) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
-			if (++pde == GEN8_PDES_PER_PAGE) {
+			if (++pde == I915_PDES_PER_PD) {
 				pdpe++;
 				pde = 0;
 			}
@@ -319,7 +319,7 @@  static void gen8_free_page_tables(struct page **pt_pages)
 	if (pt_pages == NULL)
 		return;
 
-	for (i = 0; i < GEN8_PDES_PER_PAGE; i++)
+	for (i = 0; i < I915_PDES_PER_PD; i++)
 		if (pt_pages[i])
 			__free_pages(pt_pages[i], 0);
 }
@@ -351,7 +351,7 @@  static void gen8_ppgtt_dma_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 		pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE,
 			       PCI_DMA_BIDIRECTIONAL);
 
-		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+		for (j = 0; j < I915_PDES_PER_PD; j++) {
 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
 			if (addr)
 				pci_unmap_page(hwdev, addr, PAGE_SIZE,
@@ -377,11 +377,11 @@  static struct page **__gen8_alloc_page_tables(void)
 	struct page **pt_pages;
 	int i;
 
-	pt_pages = kcalloc(GEN8_PDES_PER_PAGE, sizeof(struct page *), GFP_KERNEL);
+	pt_pages = kcalloc(I915_PDES_PER_PD, sizeof(struct page *), GFP_KERNEL);
 	if (!pt_pages)
 		return ERR_PTR(-ENOMEM);
 
-	for (i = 0; i < GEN8_PDES_PER_PAGE; i++) {
+	for (i = 0; i < I915_PDES_PER_PD; i++) {
 		pt_pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (!pt_pages[i])
 			goto bail;
@@ -431,7 +431,7 @@  static int gen8_ppgtt_allocate_dma(struct i915_hw_ppgtt *ppgtt)
 	int i;
 
 	for (i = 0; i < ppgtt->num_pd_pages; i++) {
-		ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE,
+		ppgtt->gen8_pt_dma_addr[i] = kcalloc(I915_PDES_PER_PD,
 						     sizeof(dma_addr_t),
 						     GFP_KERNEL);
 		if (!ppgtt->gen8_pt_dma_addr[i])
@@ -470,7 +470,7 @@  static int gen8_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt,
 		return ret;
 	}
 
-	ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
+	ppgtt->num_pd_entries = max_pdp * I915_PDES_PER_PD;
 
 	ret = gen8_ppgtt_allocate_dma(ppgtt);
 	if (ret)
@@ -531,7 +531,7 @@  static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt,
 static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 {
 	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
-	const int min_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
+	const int min_pt_pages = I915_PDES_PER_PD * max_pdp;
 	int i, j, ret;
 
 	if (size % (1<<30))
@@ -550,7 +550,7 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 		if (ret)
 			goto bail;
 
-		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+		for (j = 0; j < I915_PDES_PER_PD; j++) {
 			ret = gen8_ppgtt_setup_page_tables(ppgtt, i, j);
 			if (ret)
 				goto bail;
@@ -568,7 +568,7 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	for (i = 0; i < max_pdp; i++) {
 		gen8_ppgtt_pde_t *pd_vaddr;
 		pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]);
-		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
+		for (j = 0; j < I915_PDES_PER_PD; j++) {
 			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
 			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
 						      I915_CACHE_LLC);
@@ -582,7 +582,7 @@  static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
 	ppgtt->base.start = 0;
-	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE * PAGE_SIZE;
+	ppgtt->base.total = ppgtt->num_pd_entries * GEN8_PTES_PER_PT * PAGE_SIZE;
 
 	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
 			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
@@ -628,9 +628,9 @@  static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
 		seq_printf(m, "\tPDE: %x\n", pd_entry);
 
 		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
-		for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) {
+		for (pte = 0; pte < GEN6_PTES_PER_PT; pte+=4) {
 			unsigned long va =
-				(pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) +
+				(pde * PAGE_SIZE * GEN6_PTES_PER_PT) +
 				(pte * PAGE_SIZE);
 			int i;
 			bool found = false;
@@ -909,29 +909,28 @@  static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
-	unsigned first_entry = start >> PAGE_SHIFT;
+	unsigned pde = gen6_pde_index(start);
 	unsigned num_entries = length >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pte = gen6_pte_index(start);
 	unsigned last_pte, i;
 
 	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
 
 	while (num_entries) {
-		last_pte = first_pte + num_entries;
-		if (last_pte > I915_PPGTT_PT_ENTRIES)
-			last_pte = I915_PPGTT_PT_ENTRIES;
+		last_pte = pte + num_entries;
+		if (last_pte > GEN6_PTES_PER_PT)
+			last_pte = GEN6_PTES_PER_PT;
 
-		pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+		pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		for (i = first_pte; i < last_pte; i++)
+		for (i = pte; i < last_pte; i++)
 			pt_vaddr[i] = scratch_pte;
 
 		kunmap_atomic(pt_vaddr);
 
-		num_entries -= last_pte - first_pte;
-		first_pte = 0;
-		act_pt++;
+		num_entries -= last_pte - pte;
+		pte = 0;
+		pde++;
 	}
 }
 
@@ -943,24 +942,23 @@  static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
-	unsigned first_entry = start >> PAGE_SHIFT;
-	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
-	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
+	unsigned pde = gen6_pde_index(start);
+	unsigned pte = gen6_pte_index(start);
 	struct sg_page_iter sg_iter;
 
 	pt_vaddr = NULL;
 	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
 		if (pt_vaddr == NULL)
-			pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pt]);
+			pt_vaddr = kmap_atomic(ppgtt->pt_pages[pde]);
 
-		pt_vaddr[act_pte] =
+		pt_vaddr[pte] =
 			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
 				       cache_level, true);
-		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
+		if (++pte == GEN6_PTES_PER_PT) {
 			kunmap_atomic(pt_vaddr);
 			pt_vaddr = NULL;
-			act_pt++;
-			act_pte = 0;
+			pde++;
+			pte = 0;
 		}
 	}
 	if (pt_vaddr)
@@ -1005,7 +1003,7 @@  static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
 {
 #define GEN6_PD_ALIGN (PAGE_SIZE * 16)
-#define GEN6_PD_SIZE (GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE)
+#define GEN6_PD_SIZE (I915_PDES_PER_PD * PAGE_SIZE)
 	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	bool retried = false;
@@ -1039,7 +1037,7 @@  alloc:
 	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
 		DRM_DEBUG("Forced to use aperture for PDEs\n");
 
-	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
+	ppgtt->num_pd_entries = I915_PDES_PER_PD;
 	return 0;
 }
 
@@ -1144,7 +1142,7 @@  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
 	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
 	ppgtt->base.start = 0;
-	ppgtt->base.total =  ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
+	ppgtt->base.total =  ppgtt->num_pd_entries * GEN6_PTES_PER_PT * PAGE_SIZE;
 	ppgtt->debug_dump = gen6_dump_ppgtt;
 
 	ppgtt->pd_offset =
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index c8d5c77..f813769 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -1,8 +1,11 @@ 
 #ifndef _I915_GEM_GTT_H
 #define _I915_GEM_GTT_H
 
-#define GEN6_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+/* GEN Agnostic defines */
+#define I915_PDES_PER_PD		512
+#define I915_PTE_MASK			(PAGE_SHIFT-1)
+#define I915_PDE_MASK			(I915_PDES_PER_PD-1)
+
 typedef uint32_t gen6_gtt_pte_t;
 typedef uint64_t gen8_gtt_pte_t;
 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
@@ -23,6 +26,98 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
 #define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
+
+/* GEN6 PPGTT resembles a 2 level page table:
+ * 31:22 | 21:12 |  11:0
+ *  PDE  |  PTE  | offset
+ */
+#define GEN6_PDE_SHIFT			22
+#define GEN6_PTES_PER_PT		(PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+
+static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift)
+{
+	const uint32_t mask = (1 << (pde_shift - PAGE_SHIFT)) - 1;
+	return (address >> PAGE_SHIFT) & mask;
+}
+
+/* Helper to counts the number of PTEs within the given length. This count does
+ * not cross a page table boundary, so the max value would be
+ * GEN6_PTES_PER_PT for GEN6, and GEN8_PTES_PER_PT for GEN8.
+ */
+static inline size_t i915_pte_count(uint64_t addr, size_t length,
+				    uint32_t pde_shift)
+{
+	const uint64_t pd_mask = ~((1 << pde_shift) - 1);
+	uint64_t end;
+
+	if (WARN_ON(!length))
+		return 0;
+
+	if (WARN_ON(addr % PAGE_SIZE))
+		addr = round_down(addr, PAGE_SIZE);
+
+	if (WARN_ON(length % PAGE_SIZE))
+		length = round_up(length, PAGE_SIZE);
+
+	end = addr + length;
+
+	if ((addr & pd_mask) != (end & pd_mask))
+		return (1 << (pde_shift - PAGE_SHIFT)) -
+			i915_pte_index(addr, pde_shift);
+
+	return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift);
+}
+
+static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift)
+{
+	return (addr >> shift) & I915_PDE_MASK;
+}
+
+static inline size_t i915_pde_count(uint64_t addr, uint64_t length,
+				    uint32_t pde_shift)
+{
+	const uint32_t pdp_shift = pde_shift + 9;
+	uint32_t start, end;
+
+	if (WARN_ON(!length))
+		return 0;
+
+	if (WARN_ON(addr % PAGE_SIZE))
+		addr = round_down(addr, PAGE_SIZE);
+
+	if (WARN_ON(length % PAGE_SIZE))
+		length = round_up(length, PAGE_SIZE);
+
+	start = i915_pde_index(addr, pde_shift);
+	end = round_up(addr + length, (1 << pde_shift)) >> pde_shift;
+
+	if (addr >> pdp_shift != (addr + length) >> pdp_shift)
+		end = round_down(end, I915_PDES_PER_PD);
+
+	BUG_ON(start > end);
+	return end - start;
+}
+
+static inline uint32_t gen6_pte_index(uint32_t addr)
+{
+	return i915_pte_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline size_t gen6_pte_count(uint32_t addr, uint32_t length)
+{
+	return i915_pte_count(addr, length, GEN6_PDE_SHIFT);
+}
+
+static inline uint32_t gen6_pde_index(uint32_t addr)
+{
+	return i915_pde_index(addr, GEN6_PDE_SHIFT);
+}
+
+static inline size_t gen6_pde_count(uint32_t addr, uint32_t length)
+{
+	return i915_pde_count(addr, length, GEN6_PDE_SHIFT);
+}
+
 #define BYT_PTE_WRITEABLE		(1 << 1)
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
 
@@ -44,8 +139,7 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
 
 #define GEN8_LEGACY_PDPS		4
-#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
-#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
+#define GEN8_PTES_PER_PT		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
 
 /* GEN8 legacy style addressis defined as a 3 level page table:
  * 31:30 | 29:21 | 20:12 |  11:0
@@ -60,9 +154,26 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
 #define GEN8_PDPE_SHIFT			30
 #define GEN8_PDPE_MASK			0x3
 #define GEN8_PDE_SHIFT			21
-#define GEN8_PDE_MASK			0x1ff
-#define GEN8_PTE_SHIFT			12
-#define GEN8_PTE_MASK			0x1ff
+
+static inline uint32_t gen8_pte_index(uint64_t address)
+{
+	return i915_pte_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pde_index(uint64_t address)
+{
+	return i915_pde_index(address, GEN8_PDE_SHIFT);
+}
+
+static inline uint32_t gen8_pdpe_index(uint64_t address)
+{
+	return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
+}
+
+static inline uint32_t gen8_pml4e_index(uint64_t address)
+{
+	BUG();
+}
 
 enum i915_cache_level;
 /**