diff mbox

[05/12] drm/i915/bdw: implement alloc/free for 4lvl

Message ID 1424454366-19006-6-git-send-email-michel.thierry@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Michel Thierry Feb. 20, 2015, 5:45 p.m. UTC
From: Ben Widawsky <benjamin.widawsky@intel.com>

The code for 4lvl works just as one would expect, and nicely it is able
to call into the existing 3lvl page table code to handle all of the
lower levels.

PML4 has no special attributes, and there will always be a PML4.
So simply initialize it at creation, and destroy it at the end.

v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the
compiler happy. And define ret only in one place.
Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2)
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_gem_gtt.h |  11 +-
 2 files changed, 217 insertions(+), 34 deletions(-)

Comments

Akash Goel March 3, 2015, 12:55 p.m. UTC | #1
On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry
<michel.thierry@intel.com> wrote:
> From: Ben Widawsky <benjamin.widawsky@intel.com>
>
> The code for 4lvl works just as one would expect, and nicely it is able
> to call into the existing 3lvl page table code to handle all of the
> lower levels.
>
> PML4 has no special attributes, and there will always be a PML4.
> So simply initialize it at creation, and destroy it at the end.
>
> v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the
> compiler happy. And define ret only in one place.
> Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl.
>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2)
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  11 +-
>  2 files changed, 217 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1edcc17..edada33 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp)
>  static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp,
>                             struct drm_device *dev)
>  {
> -       __pdp_fini(pdp);
> -       if (USES_FULL_48BIT_PPGTT(dev))
> +       if (USES_FULL_48BIT_PPGTT(dev)) {
> +               __pdp_fini(pdp);

Call to __pdp_fini should be made for the 32 bit also.
The 'used_pdpes' bitmap & 'page_directory' double pointer needs to be freed
in 32 bit case also (allocated inside __pdp_init, called from
gen8_ppgtt_init_common).

> +               i915_dma_unmap_single(pdp, dev);
> +               __free_page(pdp->page);
>                 kfree(pdp);
> +       }
>  }
>
>  static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
> @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
>         return 0;
>  }
>
> +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt,
> +                                              struct i915_pml4 *pml4)
> +{
> +       struct drm_device *dev = ppgtt->base.dev;
> +       struct i915_page_directory_pointer_entry *pdp;
> +       int ret;
> +
> +       BUG_ON(!USES_FULL_48BIT_PPGTT(dev));
> +
> +       pdp = kmalloc(sizeof(*pdp), GFP_KERNEL);
> +       if (!pdp)
> +               return ERR_PTR(-ENOMEM);
> +
> +       pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
> +       if (!pdp->page) {
> +               kfree(pdp);
> +               return ERR_PTR(-ENOMEM);
> +       }
> +
> +       ret = __pdp_init(pdp, dev);
> +       if (ret) {
> +               __free_page(pdp->page);
> +               kfree(pdp);
> +               return ERR_PTR(ret);
> +       }
> +
> +       i915_dma_map_px_single(pdp, dev);
> +
> +       return pdp;
> +}
> +
> +static void pml4_fini(struct i915_pml4 *pml4)
> +{
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(pml4, struct i915_hw_ppgtt, pml4);
> +       i915_dma_unmap_single(pml4, ppgtt->base.dev);
> +       __free_page(pml4->page);
> +       /* HACK */
> +       pml4->page = NULL;
> +}
> +
> +static int pml4_init(struct i915_hw_ppgtt *ppgtt)
> +{
> +       struct i915_pml4 *pml4 = &ppgtt->pml4;
> +
> +       pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (!pml4->page)
> +               return -ENOMEM;
> +
> +       i915_dma_map_px_single(pml4, ppgtt->base.dev);
> +
> +       return 0;
> +}
> +
>  /* Broadwell Page Directory Pointer Descriptors */
>  static int gen8_write_pdp(struct intel_engine_cs *ring,
>                           unsigned entry,
> @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d
>         }
>  }
>
> -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                       struct drm_device *dev)
>  {
> -       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
> -       struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
> +       struct pci_dev *hwdev = dev->pdev;
>         int i, j;
>
> -       for_each_set_bit(i, pdp->used_pdpes,
> -                       I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
>                 struct i915_page_directory_entry *pd;
>
>                 if (WARN_ON(!pdp->page_directory[i]))
> @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
>         }
>  }
>
> -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> +       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
>         int i;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> -               for_each_set_bit(i, ppgtt->pdp.used_pdpes,
> -                                I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> -                       if (WARN_ON(!ppgtt->pdp.page_directory[i]))
> -                               continue;
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               struct i915_page_directory_pointer_entry *pdp;
>
> -                       gen8_free_page_tables(ppgtt->pdp.page_directory[i],
> -                                             ppgtt->base.dev);
> -                       unmap_and_free_pd(ppgtt->pdp.page_directory[i],
> -                                         ppgtt->base.dev);
> -               }
> -               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> -       } else {
> -               BUG(); /* to be implemented later */
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               pdp = ppgtt->pml4.pdps[i];
> +               if (!pdp->daddr)
> +                       pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE,
> +                                      PCI_DMA_BIDIRECTIONAL);
> +

For consistency & cleanup,  the call to pci_unmap_page can be replaced
with i915_dma_unmap_single.
Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also.

> +               gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i],
> +                                           ppgtt->base.dev);
>         }
>  }
>
> +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_unmap_pages_4lvl(ppgtt);
> +}
> +
> +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                struct drm_device *dev)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
> +               if (WARN_ON(!pdp->page_directory[i]))
> +                       continue;
> +
> +               gen8_free_page_tables(pdp->page_directory[i], dev);
> +               unmap_and_free_pd(pdp->page_directory[i], dev);
> +       }
> +
> +       unmap_and_free_pdp(pdp, dev);
> +}
> +
> +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev);
> +       }
> +
> +       pml4_fini(&ppgtt->pml4);
> +}
> +
> +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_free_4lvl(ppgtt);
> +}
> +
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>         struct i915_hw_ppgtt *ppgtt =

Is the call to 'gen8_ppgtt_unmap_pages' really needed from
'gen8_ppgtt_cleanup' function,
considering that gen8_ppgtt_free will do the unmap operation also, for
the pml4, pdp, pd & pt pages.

> @@ -1040,12 +1142,74 @@ err_out:
>         return ret;
>  }
>
> -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> -                                              struct i915_pml4 *pml4,
> -                                              uint64_t start,
> -                                              uint64_t length)
> +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> +                                   struct i915_pml4 *pml4,
> +                                   uint64_t start,
> +                                   uint64_t length)
>  {
> -       BUG(); /* to be implemented later */
> +       DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(vm, struct i915_hw_ppgtt, base);
> +       struct i915_page_directory_pointer_entry *pdp;
> +       const uint64_t orig_start = start;
> +       const uint64_t orig_length = length;
> +       uint64_t temp, pml4e;
> +       int ret = 0;
> +
> +       /* Do the pml4 allocations first, so we don't need to track the newly
> +        * allocated tables below the pdp */
> +       bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
> +
> +       /* The page_directoryectory and pagetable allocations are done in the shared 3
> +        * and 4 level code. Just allocate the pdps.
> +        */
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               if (!pdp) {
> +                       WARN_ON(test_bit(pml4e, pml4->used_pml4es));
> +                       pdp = alloc_pdp_single(ppgtt, pml4);
> +                       if (IS_ERR(pdp))
> +                               goto err_alloc;
> +
> +                       pml4->pdps[pml4e] = pdp;
> +                       set_bit(pml4e, new_pdps);
> +                       trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e,
> +                                                  pml4e << GEN8_PML4E_SHIFT,
> +                                                  GEN8_PML4E_SHIFT);
> +
> +               }
> +       }
> +
> +       WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
> +            "The allocation has spanned more than 512GB. "
> +            "It is highly likely this is incorrect.");
> +
> +       start = orig_start;
> +       length = orig_length;
> +
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               BUG_ON(!pdp);
> +
> +               ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
> +               if (ret)
> +                       goto err_out;
> +       }
> +
> +       bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> +                 GEN8_PML4ES_PER_PML4);
> +
> +       return 0;
> +
> +err_out:
> +       start = orig_start;
> +       length = orig_length;
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e)
> +               gen8_ppgtt_free_3lvl(pdp, vm->dev);
> +
> +err_alloc:
> +       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> +               unmap_and_free_pdp(pdp, vm->dev);
> +

If __gen8_alloc_vma_range_3lvl returns an error, then there can be 2
calls to unmap_and_free_pdp
for the same pdp value. The gen8_ppgtt_free_3lvl will also call the
unmap_and_free_pdp for the newly allocated
pdp.
Already __gen8_alloc_vma_range_3lvl seems to be error handling
internally, i.e. it is de-allocating the newly
allocated pages for page directory & page tables, if there is an
allocation failure somewhere.
So similarly __gen8_alloc_vma_range_4lvl function can just do the
de-allocation for the newly allocated pdps ('new_pdps').

> +       return ret;
>  }
>
>  static int gen8_alloc_va_range(struct i915_address_space *vm,
> @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>         struct i915_hw_ppgtt *ppgtt =
>                 container_of(vm, struct i915_hw_ppgtt, base);
>
> -       if (!USES_FULL_48BIT_PPGTT(vm->dev))
> -               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
> -       else
> +       if (USES_FULL_48BIT_PPGTT(vm->dev))
>                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
> +       else
> +               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>
>  static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
>  {
>         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> -       unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               pml4_fini(&ppgtt->pml4);
> +       else
> +               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
>  }
>
>  /**
> @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>
>         ppgtt->switch_mm = gen8_mm_switch;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +               int ret = pml4_init(ppgtt);
> +               if (ret) {
> +                       unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> +                       return ret;
> +               }
> +       } else {
>                 int ret = __pdp_init(&ppgtt->pdp, false);
>                 if (ret) {
>                         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
>                         return ret;
>                 }
> -       } else
> -               return -EPERM; /* Not yet implemented */
> +
> +               trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 144858e..1477f54 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>   */
>  #define GEN8_PML4ES_PER_PML4           512
>  #define GEN8_PML4E_SHIFT               39
> +#define GEN8_PML4E_MASK                        (GEN8_PML4ES_PER_PML4 - 1)
>  #define GEN8_PDPE_SHIFT                        30
>  /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
>   * tables */
> @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>              temp = min(temp, length),                                  \
>              start += temp, length -= temp)
>
> +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
> +       for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter];  \
> +            length > 0 && iter < GEN8_PML4ES_PER_PML4;                 \
> +            pdp = (pml4)->pdps[++iter],                                \
> +            temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,   \
> +            temp = min(temp, length),                                  \
> +            start += temp, length -= temp)
> +
>  #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
>         gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev))
>
> @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address)
>
>  static inline uint32_t gen8_pml4e_index(uint64_t address)
>  {
> -       BUG(); /* For 64B */
> +       return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
>  }
>
>  static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)
> --
> 2.1.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Akash Goel March 4, 2015, 2:48 a.m. UTC | #2
On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry
<michel.thierry@intel.com> wrote:
> From: Ben Widawsky <benjamin.widawsky@intel.com>
>
> The code for 4lvl works just as one would expect, and nicely it is able
> to call into the existing 3lvl page table code to handle all of the
> lower levels.
>
> PML4 has no special attributes, and there will always be a PML4.
> So simply initialize it at creation, and destroy it at the end.
>
> v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the
> compiler happy. And define ret only in one place.
> Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl.
>
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2)
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++-----
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  11 +-
>  2 files changed, 217 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1edcc17..edada33 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp)
>  static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp,
>                             struct drm_device *dev)
>  {
> -       __pdp_fini(pdp);
> -       if (USES_FULL_48BIT_PPGTT(dev))
> +       if (USES_FULL_48BIT_PPGTT(dev)) {
> +               __pdp_fini(pdp);
> +               i915_dma_unmap_single(pdp, dev);
> +               __free_page(pdp->page);
>                 kfree(pdp);
> +       }
>  }
>
>  static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
> @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
>         return 0;
>  }
>
> +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt,
> +                                              struct i915_pml4 *pml4)
> +{
> +       struct drm_device *dev = ppgtt->base.dev;
> +       struct i915_page_directory_pointer_entry *pdp;
> +       int ret;
> +
> +       BUG_ON(!USES_FULL_48BIT_PPGTT(dev));
> +
> +       pdp = kmalloc(sizeof(*pdp), GFP_KERNEL);
> +       if (!pdp)
> +               return ERR_PTR(-ENOMEM);
> +
> +       pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
> +       if (!pdp->page) {
> +               kfree(pdp);
> +               return ERR_PTR(-ENOMEM);
> +       }
> +
> +       ret = __pdp_init(pdp, dev);
> +       if (ret) {
> +               __free_page(pdp->page);
> +               kfree(pdp);
> +               return ERR_PTR(ret);
> +       }
> +
> +       i915_dma_map_px_single(pdp, dev);
> +
> +       return pdp;
> +}
> +
> +static void pml4_fini(struct i915_pml4 *pml4)
> +{
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(pml4, struct i915_hw_ppgtt, pml4);
> +       i915_dma_unmap_single(pml4, ppgtt->base.dev);
> +       __free_page(pml4->page);
> +       /* HACK */
> +       pml4->page = NULL;
> +}
> +
> +static int pml4_init(struct i915_hw_ppgtt *ppgtt)
> +{
> +       struct i915_pml4 *pml4 = &ppgtt->pml4;
> +
> +       pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +       if (!pml4->page)
> +               return -ENOMEM;
> +
> +       i915_dma_map_px_single(pml4, ppgtt->base.dev);
> +
> +       return 0;
> +}
> +
>  /* Broadwell Page Directory Pointer Descriptors */
>  static int gen8_write_pdp(struct intel_engine_cs *ring,
>                           unsigned entry,
> @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d
>         }
>  }
>
> -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                       struct drm_device *dev)
>  {
> -       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
> -       struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
> +       struct pci_dev *hwdev = dev->pdev;
>         int i, j;
>
> -       for_each_set_bit(i, pdp->used_pdpes,
> -                       I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
>                 struct i915_page_directory_entry *pd;
>
>                 if (WARN_ON(!pdp->page_directory[i]))
> @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
>         }
>  }
>
> -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt)
>  {
> +       struct pci_dev *hwdev = ppgtt->base.dev->pdev;
>         int i;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> -               for_each_set_bit(i, ppgtt->pdp.used_pdpes,
> -                                I915_PDPES_PER_PDP(ppgtt->base.dev)) {
> -                       if (WARN_ON(!ppgtt->pdp.page_directory[i]))
> -                               continue;
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               struct i915_page_directory_pointer_entry *pdp;
>
> -                       gen8_free_page_tables(ppgtt->pdp.page_directory[i],
> -                                             ppgtt->base.dev);
> -                       unmap_and_free_pd(ppgtt->pdp.page_directory[i],
> -                                         ppgtt->base.dev);
> -               }
> -               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> -       } else {
> -               BUG(); /* to be implemented later */
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               pdp = ppgtt->pml4.pdps[i];
> +               if (!pdp->daddr)

Should this 'if' condition be other way round ? i.e. 'if (pdp->daddr)'.
Same applies to gen8_ppgtt_unmap_pages_3lvl also for un-mapping of pd page.


> +                       pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE,
> +                                      PCI_DMA_BIDIRECTIONAL);
> +
> +               gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i],
> +                                           ppgtt->base.dev);
>         }
>  }
>
> +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_unmap_pages_4lvl(ppgtt);
> +}
> +
> +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp,
> +                                struct drm_device *dev)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
> +               if (WARN_ON(!pdp->page_directory[i]))
> +                       continue;
> +
> +               gen8_free_page_tables(pdp->page_directory[i], dev);
> +               unmap_and_free_pd(pdp->page_directory[i], dev);
> +       }
> +
> +       unmap_and_free_pdp(pdp, dev);
> +}
> +
> +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt)
> +{
> +       int i;
> +
> +       for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
> +               if (WARN_ON(!ppgtt->pml4.pdps[i]))
> +                       continue;
> +
> +               gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev);
> +       }
> +
> +       pml4_fini(&ppgtt->pml4);
> +}
> +
> +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
> +{
> +       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev);
> +       else
> +               gen8_ppgtt_free_4lvl(ppgtt);
> +}
> +
>  static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
>  {
>         struct i915_hw_ppgtt *ppgtt =
> @@ -1040,12 +1142,74 @@ err_out:
>         return ret;
>  }
>
> -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> -                                              struct i915_pml4 *pml4,
> -                                              uint64_t start,
> -                                              uint64_t length)
> +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
> +                                   struct i915_pml4 *pml4,
> +                                   uint64_t start,
> +                                   uint64_t length)
>  {
> -       BUG(); /* to be implemented later */
> +       DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
> +       struct i915_hw_ppgtt *ppgtt =
> +               container_of(vm, struct i915_hw_ppgtt, base);
> +       struct i915_page_directory_pointer_entry *pdp;
> +       const uint64_t orig_start = start;
> +       const uint64_t orig_length = length;
> +       uint64_t temp, pml4e;
> +       int ret = 0;
> +
> +       /* Do the pml4 allocations first, so we don't need to track the newly
> +        * allocated tables below the pdp */
> +       bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
> +
> +       /* The page_directoryectory and pagetable allocations are done in the shared 3
> +        * and 4 level code. Just allocate the pdps.
> +        */
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               if (!pdp) {
> +                       WARN_ON(test_bit(pml4e, pml4->used_pml4es));
> +                       pdp = alloc_pdp_single(ppgtt, pml4);
> +                       if (IS_ERR(pdp))
> +                               goto err_alloc;
> +
> +                       pml4->pdps[pml4e] = pdp;
> +                       set_bit(pml4e, new_pdps);
> +                       trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e,
> +                                                  pml4e << GEN8_PML4E_SHIFT,
> +                                                  GEN8_PML4E_SHIFT);
> +
> +               }
> +       }
> +
> +       WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
> +            "The allocation has spanned more than 512GB. "
> +            "It is highly likely this is incorrect.");
> +
> +       start = orig_start;
> +       length = orig_length;
> +
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
> +               BUG_ON(!pdp);
> +
> +               ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
> +               if (ret)
> +                       goto err_out;
> +       }
> +
> +       bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
> +                 GEN8_PML4ES_PER_PML4);
> +
> +       return 0;
> +
> +err_out:
> +       start = orig_start;
> +       length = orig_length;
> +       gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e)
> +               gen8_ppgtt_free_3lvl(pdp, vm->dev);
> +
> +err_alloc:
> +       for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
> +               unmap_and_free_pdp(pdp, vm->dev);
> +
> +       return ret;
>  }
>
>  static int gen8_alloc_va_range(struct i915_address_space *vm,
> @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm,
>         struct i915_hw_ppgtt *ppgtt =
>                 container_of(vm, struct i915_hw_ppgtt, base);
>
> -       if (!USES_FULL_48BIT_PPGTT(vm->dev))
> -               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
> -       else
> +       if (USES_FULL_48BIT_PPGTT(vm->dev))
>                 return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
> +       else
> +               return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
>  }
>
>  static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
>  {
>         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> -       unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
> +               pml4_fini(&ppgtt->pml4);
> +       else
> +               unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
>  }
>
>  /**
> @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
>
>         ppgtt->switch_mm = gen8_mm_switch;
>
> -       if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +       if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +               int ret = pml4_init(ppgtt);
> +               if (ret) {
> +                       unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
> +                       return ret;
> +               }
> +       } else {
>                 int ret = __pdp_init(&ppgtt->pdp, false);
>                 if (ret) {
>                         unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
>                         return ret;
>                 }
> -       } else
> -               return -EPERM; /* Not yet implemented */
> +
> +               trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
> +       }
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 144858e..1477f54 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
>   */
>  #define GEN8_PML4ES_PER_PML4           512
>  #define GEN8_PML4E_SHIFT               39
> +#define GEN8_PML4E_MASK                        (GEN8_PML4ES_PER_PML4 - 1)
>  #define GEN8_PDPE_SHIFT                        30
>  /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
>   * tables */
> @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr)
>              temp = min(temp, length),                                  \
>              start += temp, length -= temp)
>
> +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)      \
> +       for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter];  \
> +            length > 0 && iter < GEN8_PML4ES_PER_PML4;                 \
> +            pdp = (pml4)->pdps[++iter],                                \
> +            temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,   \
> +            temp = min(temp, length),                                  \
> +            start += temp, length -= temp)
> +
>  #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)         \
>         gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev))
>
> @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address)
>
>  static inline uint32_t gen8_pml4e_index(uint64_t address)
>  {
> -       BUG(); /* For 64B */
> +       return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
>  }
>
>  static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)
> --
> 2.1.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter March 4, 2015, 1 p.m. UTC | #3
On Tue, Mar 03, 2015 at 06:25:27PM +0530, akash goel wrote:
> On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry
> > +               pdp = ppgtt->pml4.pdps[i];
> > +               if (!pdp->daddr)
> > +                       pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE,
> > +                                      PCI_DMA_BIDIRECTIONAL);
> > +
> 
> For consistency & cleanup,  the call to pci_unmap_page can be replaced
> with i915_dma_unmap_single.
> Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also.

Everything but the dma api interfaces (dma_unmap_page) is deprecated. A
follow-up patch to go through all the i915 code and do these replacements
would be nice. After all this landed ofc.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1edcc17..edada33 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -483,9 +483,12 @@  static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp)
 static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp,
 			    struct drm_device *dev)
 {
-	__pdp_fini(pdp);
-	if (USES_FULL_48BIT_PPGTT(dev))
+	if (USES_FULL_48BIT_PPGTT(dev)) {
+		__pdp_fini(pdp);
+		i915_dma_unmap_single(pdp, dev);
+		__free_page(pdp->page);
 		kfree(pdp);
+	}
 }
 
 static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
@@ -511,6 +514,60 @@  static int __pdp_init(struct i915_page_directory_pointer_entry *pdp,
 	return 0;
 }
 
+static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt,
+					       struct i915_pml4 *pml4)
+{
+	struct drm_device *dev = ppgtt->base.dev;
+	struct i915_page_directory_pointer_entry *pdp;
+	int ret;
+
+	BUG_ON(!USES_FULL_48BIT_PPGTT(dev));
+
+	pdp = kmalloc(sizeof(*pdp), GFP_KERNEL);
+	if (!pdp)
+		return ERR_PTR(-ENOMEM);
+
+	pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
+	if (!pdp->page) {
+		kfree(pdp);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = __pdp_init(pdp, dev);
+	if (ret) {
+		__free_page(pdp->page);
+		kfree(pdp);
+		return ERR_PTR(ret);
+	}
+
+	i915_dma_map_px_single(pdp, dev);
+
+	return pdp;
+}
+
+static void pml4_fini(struct i915_pml4 *pml4)
+{
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(pml4, struct i915_hw_ppgtt, pml4);
+	i915_dma_unmap_single(pml4, ppgtt->base.dev);
+	__free_page(pml4->page);
+	/* HACK */
+	pml4->page = NULL;
+}
+
+static int pml4_init(struct i915_hw_ppgtt *ppgtt)
+{
+	struct i915_pml4 *pml4 = &ppgtt->pml4;
+
+	pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!pml4->page)
+		return -ENOMEM;
+
+	i915_dma_map_px_single(pml4, ppgtt->base.dev);
+
+	return 0;
+}
+
 /* Broadwell Page Directory Pointer Descriptors */
 static int gen8_write_pdp(struct intel_engine_cs *ring,
 			  unsigned entry,
@@ -712,14 +769,13 @@  static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d
 	}
 }
 
-static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp,
+					struct drm_device *dev)
 {
-	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
-	struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */
+	struct pci_dev *hwdev = dev->pdev;
 	int i, j;
 
-	for_each_set_bit(i, pdp->used_pdpes,
-			I915_PDPES_PER_PDP(ppgtt->base.dev)) {
+	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
 		struct i915_page_directory_entry *pd;
 
 		if (WARN_ON(!pdp->page_directory[i]))
@@ -747,27 +803,73 @@  static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
 	}
 }
 
-static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt)
 {
+	struct pci_dev *hwdev = ppgtt->base.dev->pdev;
 	int i;
 
-	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
-		for_each_set_bit(i, ppgtt->pdp.used_pdpes,
-				 I915_PDPES_PER_PDP(ppgtt->base.dev)) {
-			if (WARN_ON(!ppgtt->pdp.page_directory[i]))
-				continue;
+	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+		struct i915_page_directory_pointer_entry *pdp;
 
-			gen8_free_page_tables(ppgtt->pdp.page_directory[i],
-					      ppgtt->base.dev);
-			unmap_and_free_pd(ppgtt->pdp.page_directory[i],
-					  ppgtt->base.dev);
-		}
-		unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
-	} else {
-		BUG(); /* to be implemented later */
+		if (WARN_ON(!ppgtt->pml4.pdps[i]))
+			continue;
+
+		pdp = ppgtt->pml4.pdps[i];
+		if (!pdp->daddr)
+			pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE,
+				       PCI_DMA_BIDIRECTIONAL);
+
+		gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i],
+					    ppgtt->base.dev);
 	}
 }
 
+static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
+{
+	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+		gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev);
+	else
+		gen8_ppgtt_unmap_pages_4lvl(ppgtt);
+}
+
+static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp,
+				 struct drm_device *dev)
+{
+	int i;
+
+	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
+		if (WARN_ON(!pdp->page_directory[i]))
+			continue;
+
+		gen8_free_page_tables(pdp->page_directory[i], dev);
+		unmap_and_free_pd(pdp->page_directory[i], dev);
+	}
+
+	unmap_and_free_pdp(pdp, dev);
+}
+
+static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt)
+{
+	int i;
+
+	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
+		if (WARN_ON(!ppgtt->pml4.pdps[i]))
+			continue;
+
+		gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev);
+	}
+
+	pml4_fini(&ppgtt->pml4);
+}
+
+static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
+{
+	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+		gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev);
+	else
+		gen8_ppgtt_free_4lvl(ppgtt);
+}
+
 static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 {
 	struct i915_hw_ppgtt *ppgtt =
@@ -1040,12 +1142,74 @@  err_out:
 	return ret;
 }
 
-static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
-					       struct i915_pml4 *pml4,
-					       uint64_t start,
-					       uint64_t length)
+static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
+				    struct i915_pml4 *pml4,
+				    uint64_t start,
+				    uint64_t length)
 {
-	BUG(); /* to be implemented later */
+	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
+	struct i915_page_directory_pointer_entry *pdp;
+	const uint64_t orig_start = start;
+	const uint64_t orig_length = length;
+	uint64_t temp, pml4e;
+	int ret = 0;
+
+	/* Do the pml4 allocations first, so we don't need to track the newly
+	 * allocated tables below the pdp */
+	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
+
+	/* The page_directoryectory and pagetable allocations are done in the shared 3
+	 * and 4 level code. Just allocate the pdps.
+	 */
+	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+		if (!pdp) {
+			WARN_ON(test_bit(pml4e, pml4->used_pml4es));
+			pdp = alloc_pdp_single(ppgtt, pml4);
+			if (IS_ERR(pdp))
+				goto err_alloc;
+
+			pml4->pdps[pml4e] = pdp;
+			set_bit(pml4e, new_pdps);
+			trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e,
+						   pml4e << GEN8_PML4E_SHIFT,
+						   GEN8_PML4E_SHIFT);
+
+		}
+	}
+
+	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
+	     "The allocation has spanned more than 512GB. "
+	     "It is highly likely this is incorrect.");
+
+	start = orig_start;
+	length = orig_length;
+
+	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
+		BUG_ON(!pdp);
+
+		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
+		if (ret)
+			goto err_out;
+	}
+
+	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
+		  GEN8_PML4ES_PER_PML4);
+
+	return 0;
+
+err_out:
+	start = orig_start;
+	length = orig_length;
+	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e)
+		gen8_ppgtt_free_3lvl(pdp, vm->dev);
+
+err_alloc:
+	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
+		unmap_and_free_pdp(pdp, vm->dev);
+
+	return ret;
 }
 
 static int gen8_alloc_va_range(struct i915_address_space *vm,
@@ -1054,16 +1218,19 @@  static int gen8_alloc_va_range(struct i915_address_space *vm,
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 
-	if (!USES_FULL_48BIT_PPGTT(vm->dev))
-		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
-	else
+	if (USES_FULL_48BIT_PPGTT(vm->dev))
 		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
+	else
+		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
 }
 
 static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt)
 {
 	unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
-	unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
+	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+		pml4_fini(&ppgtt->pml4);
+	else
+		unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev);
 }
 
 /**
@@ -1086,14 +1253,21 @@  static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 
 	ppgtt->switch_mm = gen8_mm_switch;
 
-	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+		int ret = pml4_init(ppgtt);
+		if (ret) {
+			unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
+			return ret;
+		}
+	} else {
 		int ret = __pdp_init(&ppgtt->pdp, false);
 		if (ret) {
 			unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev);
 			return ret;
 		}
-	} else
-		return -EPERM; /* Not yet implemented */
+
+		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT);
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 144858e..1477f54 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -87,6 +87,7 @@  typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  */
 #define GEN8_PML4ES_PER_PML4		512
 #define GEN8_PML4E_SHIFT		39
+#define GEN8_PML4E_MASK			(GEN8_PML4ES_PER_PML4 - 1)
 #define GEN8_PDPE_SHIFT			30
 /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
  * tables */
@@ -427,6 +428,14 @@  static inline uint32_t gen6_pde_index(uint32_t addr)
 	     temp = min(temp, length),					\
 	     start += temp, length -= temp)
 
+#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter)	\
+	for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter];	\
+	     length > 0 && iter < GEN8_PML4ES_PER_PML4;			\
+	     pdp = (pml4)->pdps[++iter],				\
+	     temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start,	\
+	     temp = min(temp, length),					\
+	     start += temp, length -= temp)
+
 #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter)		\
 	gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev))
 
@@ -458,7 +467,7 @@  static inline uint32_t gen8_pdpe_index(uint64_t address)
 
 static inline uint32_t gen8_pml4e_index(uint64_t address)
 {
-	BUG(); /* For 64B */
+	return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
 }
 
 static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)