Message ID | 1424454366-19006-6-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry <michel.thierry@intel.com> wrote: > From: Ben Widawsky <benjamin.widawsky@intel.com> > > The code for 4lvl works just as one would expect, and nicely it is able > to call into the existing 3lvl page table code to handle all of the > lower levels. > > PML4 has no special attributes, and there will always be a PML4. > So simply initialize it at creation, and destroy it at the end. > > v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the > compiler happy. And define ret only in one place. > Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_gem_gtt.h | 11 +- > 2 files changed, 217 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 1edcc17..edada33 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp) > static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp, > struct drm_device *dev) > { > - __pdp_fini(pdp); > - if (USES_FULL_48BIT_PPGTT(dev)) > + if (USES_FULL_48BIT_PPGTT(dev)) { > + __pdp_fini(pdp); Call to __pdp_fini should be made for the 32 bit also. The 'used_pdpes' bitmap & 'page_directory' double pointer needs to be freed in 32 bit case also (allocated inside __pdp_init, called from gen8_ppgtt_init_common). > + i915_dma_unmap_single(pdp, dev); > + __free_page(pdp->page); > kfree(pdp); > + } > } > > static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > return 0; > } > > +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt, > + struct i915_pml4 *pml4) > +{ > + struct drm_device *dev = ppgtt->base.dev; > + struct i915_page_directory_pointer_entry *pdp; > + int ret; > + > + BUG_ON(!USES_FULL_48BIT_PPGTT(dev)); > + > + pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); > + if (!pdp) > + return ERR_PTR(-ENOMEM); > + > + pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); > + if (!pdp->page) { > + kfree(pdp); > + return ERR_PTR(-ENOMEM); > + } > + > + ret = __pdp_init(pdp, dev); > + if (ret) { > + __free_page(pdp->page); > + kfree(pdp); > + return ERR_PTR(ret); > + } > + > + i915_dma_map_px_single(pdp, dev); > + > + return pdp; > +} > + > +static void pml4_fini(struct i915_pml4 *pml4) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(pml4, struct i915_hw_ppgtt, pml4); > + i915_dma_unmap_single(pml4, ppgtt->base.dev); > + __free_page(pml4->page); > + /* HACK */ > + pml4->page = NULL; > +} > + > +static int pml4_init(struct i915_hw_ppgtt *ppgtt) > +{ > + struct i915_pml4 *pml4 = &ppgtt->pml4; > + > + pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pml4->page) > + return -ENOMEM; > + > + i915_dma_map_px_single(pml4, ppgtt->base.dev); > + > + return 0; > +} > + > /* Broadwell Page Directory Pointer Descriptors */ > static int gen8_write_pdp(struct intel_engine_cs *ring, > unsigned entry, > @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d > } > } > > -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > { > - struct pci_dev *hwdev = ppgtt->base.dev->pdev; > - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ > + struct pci_dev *hwdev = dev->pdev; > int i, j; > > - for_each_set_bit(i, pdp->used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > struct i915_page_directory_entry *pd; > > if (WARN_ON(!pdp->page_directory[i])) > @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > } > } > > -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt) > { > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > int i; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - for_each_set_bit(i, ppgtt->pdp.used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > - if (WARN_ON(!ppgtt->pdp.page_directory[i])) > - continue; > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + struct i915_page_directory_pointer_entry *pdp; > > - gen8_free_page_tables(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - unmap_and_free_pd(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - } > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > - } else { > - BUG(); /* to be implemented later */ > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + pdp = ppgtt->pml4.pdps[i]; > + if (!pdp->daddr) > + pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + For consistency & cleanup, the call to pci_unmap_page can be replaced with i915_dma_unmap_single. Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also. > + gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i], > + ppgtt->base.dev); > } > } > > +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_unmap_pages_4lvl(ppgtt); > +} > + > +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > +{ > + int i; > + > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > + if (WARN_ON(!pdp->page_directory[i])) > + continue; > + > + gen8_free_page_tables(pdp->page_directory[i], dev); > + unmap_and_free_pd(pdp->page_directory[i], dev); > + } > + > + unmap_and_free_pdp(pdp, dev); > +} > + > +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt) > +{ > + int i; > + > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev); > + } > + > + pml4_fini(&ppgtt->pml4); > +} > + > +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_free_4lvl(ppgtt); > +} > + > static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > { > struct i915_hw_ppgtt *ppgtt = Is the call to 'gen8_ppgtt_unmap_pages' really needed from 'gen8_ppgtt_cleanup' function, considering that gen8_ppgtt_free will do the unmap operation also, for the pml4, pdp, pd & pt pages. > @@ -1040,12 +1142,74 @@ err_out: > return ret; > } > > -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > - struct i915_pml4 *pml4, > - uint64_t start, > - uint64_t length) > +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > + struct i915_pml4 *pml4, > + uint64_t start, > + uint64_t length) > { > - BUG(); /* to be implemented later */ > + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + struct i915_page_directory_pointer_entry *pdp; > + const uint64_t orig_start = start; > + const uint64_t orig_length = length; > + uint64_t temp, pml4e; > + int ret = 0; > + > + /* Do the pml4 allocations first, so we don't need to track the newly > + * allocated tables below the pdp */ > + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); > + > + /* The page_directoryectory and pagetable allocations are done in the shared 3 > + * and 4 level code. Just allocate the pdps. > + */ > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + if (!pdp) { > + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); > + pdp = alloc_pdp_single(ppgtt, pml4); > + if (IS_ERR(pdp)) > + goto err_alloc; > + > + pml4->pdps[pml4e] = pdp; > + set_bit(pml4e, new_pdps); > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, > + pml4e << GEN8_PML4E_SHIFT, > + GEN8_PML4E_SHIFT); > + > + } > + } > + > + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, > + "The allocation has spanned more than 512GB. " > + "It is highly likely this is incorrect."); > + > + start = orig_start; > + length = orig_length; > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + BUG_ON(!pdp); > + > + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); > + if (ret) > + goto err_out; > + } > + > + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, > + GEN8_PML4ES_PER_PML4); > + > + return 0; > + > +err_out: > + start = orig_start; > + length = orig_length; > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) > + gen8_ppgtt_free_3lvl(pdp, vm->dev); > + > +err_alloc: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + unmap_and_free_pdp(pdp, vm->dev); > + If __gen8_alloc_vma_range_3lvl returns an error, then there can be 2 calls to unmap_and_free_pdp for the same pdp value. The gen8_ppgtt_free_3lvl will also call the unmap_and_free_pdp for the newly allocated pdp. Already __gen8_alloc_vma_range_3lvl seems to be error handling internally, i.e. it is de-allocating the newly allocated pages for page directory & page tables, if there is an allocation failure somewhere. So similarly __gen8_alloc_vma_range_4lvl function can just do the de-allocation for the newly allocated pdps ('new_pdps'). > + return ret; > } > > static int gen8_alloc_va_range(struct i915_address_space *vm, > @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > > - if (!USES_FULL_48BIT_PPGTT(vm->dev)) > - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > - else > + if (USES_FULL_48BIT_PPGTT(vm->dev)) > return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); > + else > + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > } > > static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) > { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + pml4_fini(&ppgtt->pml4); > + else > + unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > } > > /** > @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > ppgtt->switch_mm = gen8_mm_switch; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + int ret = pml4_init(ppgtt); > + if (ret) { > + unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > + return ret; > + } > + } else { > int ret = __pdp_init(&ppgtt->pdp, false); > if (ret) { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > return ret; > } > - } else > - return -EPERM; /* Not yet implemented */ > + > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); > + } > > return 0; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 144858e..1477f54 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > */ > #define GEN8_PML4ES_PER_PML4 512 > #define GEN8_PML4E_SHIFT 39 > +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) > #define GEN8_PDPE_SHIFT 30 > /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page > * tables */ > @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) > temp = min(temp, length), \ > start += temp, length -= temp) > > +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ > + for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter]; \ > + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ > + pdp = (pml4)->pdps[++iter], \ > + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ > + temp = min(temp, length), \ > + start += temp, length -= temp) > + > #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ > gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) > > @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) > > static inline uint32_t gen8_pml4e_index(uint64_t address) > { > - BUG(); /* For 64B */ > + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; > } > > static inline size_t gen8_pte_count(uint64_t addr, uint64_t length) > -- > 2.1.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry <michel.thierry@intel.com> wrote: > From: Ben Widawsky <benjamin.widawsky@intel.com> > > The code for 4lvl works just as one would expect, and nicely it is able > to call into the existing 3lvl page table code to handle all of the > lower levels. > > PML4 has no special attributes, and there will always be a PML4. > So simply initialize it at creation, and destroy it at the end. > > v2: Return something at the end of gen8_alloc_va_range_4lvl to keep the > compiler happy. And define ret only in one place. > Updated gen8_ppgtt_unmap_pages and gen8_ppgtt_free to handle 4lvl. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v2) > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 240 +++++++++++++++++++++++++++++++----- > drivers/gpu/drm/i915/i915_gem_gtt.h | 11 +- > 2 files changed, 217 insertions(+), 34 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 1edcc17..edada33 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp) > static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp, > struct drm_device *dev) > { > - __pdp_fini(pdp); > - if (USES_FULL_48BIT_PPGTT(dev)) > + if (USES_FULL_48BIT_PPGTT(dev)) { > + __pdp_fini(pdp); > + i915_dma_unmap_single(pdp, dev); > + __free_page(pdp->page); > kfree(pdp); > + } > } > > static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, > return 0; > } > > +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt, > + struct i915_pml4 *pml4) > +{ > + struct drm_device *dev = ppgtt->base.dev; > + struct i915_page_directory_pointer_entry *pdp; > + int ret; > + > + BUG_ON(!USES_FULL_48BIT_PPGTT(dev)); > + > + pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); > + if (!pdp) > + return ERR_PTR(-ENOMEM); > + > + pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); > + if (!pdp->page) { > + kfree(pdp); > + return ERR_PTR(-ENOMEM); > + } > + > + ret = __pdp_init(pdp, dev); > + if (ret) { > + __free_page(pdp->page); > + kfree(pdp); > + return ERR_PTR(ret); > + } > + > + i915_dma_map_px_single(pdp, dev); > + > + return pdp; > +} > + > +static void pml4_fini(struct i915_pml4 *pml4) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(pml4, struct i915_hw_ppgtt, pml4); > + i915_dma_unmap_single(pml4, ppgtt->base.dev); > + __free_page(pml4->page); > + /* HACK */ > + pml4->page = NULL; > +} > + > +static int pml4_init(struct i915_hw_ppgtt *ppgtt) > +{ > + struct i915_pml4 *pml4 = &ppgtt->pml4; > + > + pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pml4->page) > + return -ENOMEM; > + > + i915_dma_map_px_single(pml4, ppgtt->base.dev); > + > + return 0; > +} > + > /* Broadwell Page Directory Pointer Descriptors */ > static int gen8_write_pdp(struct intel_engine_cs *ring, > unsigned entry, > @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d > } > } > > -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > { > - struct pci_dev *hwdev = ppgtt->base.dev->pdev; > - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ > + struct pci_dev *hwdev = dev->pdev; > int i, j; > > - for_each_set_bit(i, pdp->used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > struct i915_page_directory_entry *pd; > > if (WARN_ON(!pdp->page_directory[i])) > @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > } > } > > -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt) > { > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > int i; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - for_each_set_bit(i, ppgtt->pdp.used_pdpes, > - I915_PDPES_PER_PDP(ppgtt->base.dev)) { > - if (WARN_ON(!ppgtt->pdp.page_directory[i])) > - continue; > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + struct i915_page_directory_pointer_entry *pdp; > > - gen8_free_page_tables(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - unmap_and_free_pd(ppgtt->pdp.page_directory[i], > - ppgtt->base.dev); > - } > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > - } else { > - BUG(); /* to be implemented later */ > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + pdp = ppgtt->pml4.pdps[i]; > + if (!pdp->daddr) Should this 'if' condition be other way round ? i.e. 'if (pdp->daddr)'. Same applies to gen8_ppgtt_unmap_pages_3lvl also for un-mapping of pd page. > + pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + > + gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i], > + ppgtt->base.dev); > } > } > > +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_unmap_pages_4lvl(ppgtt); > +} > + > +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp, > + struct drm_device *dev) > +{ > + int i; > + > + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { > + if (WARN_ON(!pdp->page_directory[i])) > + continue; > + > + gen8_free_page_tables(pdp->page_directory[i], dev); > + unmap_and_free_pd(pdp->page_directory[i], dev); > + } > + > + unmap_and_free_pdp(pdp, dev); > +} > + > +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt) > +{ > + int i; > + > + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { > + if (WARN_ON(!ppgtt->pml4.pdps[i])) > + continue; > + > + gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev); > + } > + > + pml4_fini(&ppgtt->pml4); > +} > + > +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > +{ > + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev); > + else > + gen8_ppgtt_free_4lvl(ppgtt); > +} > + > static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > { > struct i915_hw_ppgtt *ppgtt = > @@ -1040,12 +1142,74 @@ err_out: > return ret; > } > > -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > - struct i915_pml4 *pml4, > - uint64_t start, > - uint64_t length) > +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, > + struct i915_pml4 *pml4, > + uint64_t start, > + uint64_t length) > { > - BUG(); /* to be implemented later */ > + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + struct i915_page_directory_pointer_entry *pdp; > + const uint64_t orig_start = start; > + const uint64_t orig_length = length; > + uint64_t temp, pml4e; > + int ret = 0; > + > + /* Do the pml4 allocations first, so we don't need to track the newly > + * allocated tables below the pdp */ > + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); > + > + /* The page_directoryectory and pagetable allocations are done in the shared 3 > + * and 4 level code. Just allocate the pdps. > + */ > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + if (!pdp) { > + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); > + pdp = alloc_pdp_single(ppgtt, pml4); > + if (IS_ERR(pdp)) > + goto err_alloc; > + > + pml4->pdps[pml4e] = pdp; > + set_bit(pml4e, new_pdps); > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, > + pml4e << GEN8_PML4E_SHIFT, > + GEN8_PML4E_SHIFT); > + > + } > + } > + > + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, > + "The allocation has spanned more than 512GB. " > + "It is highly likely this is incorrect."); > + > + start = orig_start; > + length = orig_length; > + > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { > + BUG_ON(!pdp); > + > + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); > + if (ret) > + goto err_out; > + } > + > + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, > + GEN8_PML4ES_PER_PML4); > + > + return 0; > + > +err_out: > + start = orig_start; > + length = orig_length; > + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) > + gen8_ppgtt_free_3lvl(pdp, vm->dev); > + > +err_alloc: > + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) > + unmap_and_free_pdp(pdp, vm->dev); > + > + return ret; > } > > static int gen8_alloc_va_range(struct i915_address_space *vm, > @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, > struct i915_hw_ppgtt *ppgtt = > container_of(vm, struct i915_hw_ppgtt, base); > > - if (!USES_FULL_48BIT_PPGTT(vm->dev)) > - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > - else > + if (USES_FULL_48BIT_PPGTT(vm->dev)) > return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); > + else > + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); > } > > static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) > { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) > + pml4_fini(&ppgtt->pml4); > + else > + unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); > } > > /** > @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > ppgtt->switch_mm = gen8_mm_switch; > > - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + int ret = pml4_init(ppgtt); > + if (ret) { > + unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > + return ret; > + } > + } else { > int ret = __pdp_init(&ppgtt->pdp, false); > if (ret) { > unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); > return ret; > } > - } else > - return -EPERM; /* Not yet implemented */ > + > + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); > + } > > return 0; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 144858e..1477f54 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > */ > #define GEN8_PML4ES_PER_PML4 512 > #define GEN8_PML4E_SHIFT 39 > +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) > #define GEN8_PDPE_SHIFT 30 > /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page > * tables */ > @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) > temp = min(temp, length), \ > start += temp, length -= temp) > > +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ > + for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter]; \ > + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ > + pdp = (pml4)->pdps[++iter], \ > + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ > + temp = min(temp, length), \ > + start += temp, length -= temp) > + > #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ > gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) > > @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) > > static inline uint32_t gen8_pml4e_index(uint64_t address) > { > - BUG(); /* For 64B */ > + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; > } > > static inline size_t gen8_pte_count(uint64_t addr, uint64_t length) > -- > 2.1.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Mar 03, 2015 at 06:25:27PM +0530, akash goel wrote: > On Fri, Feb 20, 2015 at 11:15 PM, Michel Thierry > > + pdp = ppgtt->pml4.pdps[i]; > > + if (!pdp->daddr) > > + pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE, > > + PCI_DMA_BIDIRECTIONAL); > > + > > For consistency & cleanup, the call to pci_unmap_page can be replaced > with i915_dma_unmap_single. > Same can be done inside the gen8_ppgtt_unmap_pages_3lvl function also. Everything but the dma api interfaces (dma_unmap_page) is deprecated. A follow-up patch to go through all the i915 code and do these replacements would be nice. After all this landed ofc. -Daniel
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1edcc17..edada33 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -483,9 +483,12 @@ static void __pdp_fini(struct i915_page_directory_pointer_entry *pdp) static void unmap_and_free_pdp(struct i915_page_directory_pointer_entry *pdp, struct drm_device *dev) { - __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev)) + if (USES_FULL_48BIT_PPGTT(dev)) { + __pdp_fini(pdp); + i915_dma_unmap_single(pdp, dev); + __free_page(pdp->page); kfree(pdp); + } } static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, @@ -511,6 +514,60 @@ static int __pdp_init(struct i915_page_directory_pointer_entry *pdp, return 0; } +static struct i915_page_directory_pointer_entry *alloc_pdp_single(struct i915_hw_ppgtt *ppgtt, + struct i915_pml4 *pml4) +{ + struct drm_device *dev = ppgtt->base.dev; + struct i915_page_directory_pointer_entry *pdp; + int ret; + + BUG_ON(!USES_FULL_48BIT_PPGTT(dev)); + + pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); + if (!pdp) + return ERR_PTR(-ENOMEM); + + pdp->page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); + if (!pdp->page) { + kfree(pdp); + return ERR_PTR(-ENOMEM); + } + + ret = __pdp_init(pdp, dev); + if (ret) { + __free_page(pdp->page); + kfree(pdp); + return ERR_PTR(ret); + } + + i915_dma_map_px_single(pdp, dev); + + return pdp; +} + +static void pml4_fini(struct i915_pml4 *pml4) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(pml4, struct i915_hw_ppgtt, pml4); + i915_dma_unmap_single(pml4, ppgtt->base.dev); + __free_page(pml4->page); + /* HACK */ + pml4->page = NULL; +} + +static int pml4_init(struct i915_hw_ppgtt *ppgtt) +{ + struct i915_pml4 *pml4 = &ppgtt->pml4; + + pml4->page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!pml4->page) + return -ENOMEM; + + i915_dma_map_px_single(pml4, ppgtt->base.dev); + + return 0; +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, @@ -712,14 +769,13 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd, struct d } } -static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) +static void gen8_ppgtt_unmap_pages_3lvl(struct i915_page_directory_pointer_entry *pdp, + struct drm_device *dev) { - struct pci_dev *hwdev = ppgtt->base.dev->pdev; - struct i915_page_directory_pointer_entry *pdp = &ppgtt->pdp; /* FIXME: 48b */ + struct pci_dev *hwdev = dev->pdev; int i, j; - for_each_set_bit(i, pdp->used_pdpes, - I915_PDPES_PER_PDP(ppgtt->base.dev)) { + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { struct i915_page_directory_entry *pd; if (WARN_ON(!pdp->page_directory[i])) @@ -747,27 +803,73 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) } } -static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) +static void gen8_ppgtt_unmap_pages_4lvl(struct i915_hw_ppgtt *ppgtt) { + struct pci_dev *hwdev = ppgtt->base.dev->pdev; int i; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { - for_each_set_bit(i, ppgtt->pdp.used_pdpes, - I915_PDPES_PER_PDP(ppgtt->base.dev)) { - if (WARN_ON(!ppgtt->pdp.page_directory[i])) - continue; + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + struct i915_page_directory_pointer_entry *pdp; - gen8_free_page_tables(ppgtt->pdp.page_directory[i], - ppgtt->base.dev); - unmap_and_free_pd(ppgtt->pdp.page_directory[i], - ppgtt->base.dev); - } - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); - } else { - BUG(); /* to be implemented later */ + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + pdp = ppgtt->pml4.pdps[i]; + if (!pdp->daddr) + pci_unmap_page(hwdev, pdp->daddr, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + gen8_ppgtt_unmap_pages_3lvl(ppgtt->pml4.pdps[i], + ppgtt->base.dev); } } +static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) +{ + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_unmap_pages_3lvl(&ppgtt->pdp, ppgtt->base.dev); + else + gen8_ppgtt_unmap_pages_4lvl(ppgtt); +} + +static void gen8_ppgtt_free_3lvl(struct i915_page_directory_pointer_entry *pdp, + struct drm_device *dev) +{ + int i; + + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { + if (WARN_ON(!pdp->page_directory[i])) + continue; + + gen8_free_page_tables(pdp->page_directory[i], dev); + unmap_and_free_pd(pdp->page_directory[i], dev); + } + + unmap_and_free_pdp(pdp, dev); +} + +static void gen8_ppgtt_free_4lvl(struct i915_hw_ppgtt *ppgtt) +{ + int i; + + for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { + if (WARN_ON(!ppgtt->pml4.pdps[i])) + continue; + + gen8_ppgtt_free_3lvl(ppgtt->pml4.pdps[i], ppgtt->base.dev); + } + + pml4_fini(&ppgtt->pml4); +} + +static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) +{ + if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + gen8_ppgtt_free_3lvl(&ppgtt->pdp, ppgtt->base.dev); + else + gen8_ppgtt_free_4lvl(ppgtt); +} + static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = @@ -1040,12 +1142,74 @@ err_out: return ret; } -static int __noreturn gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, + struct i915_pml4 *pml4, + uint64_t start, + uint64_t length) { - BUG(); /* to be implemented later */ + DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + struct i915_page_directory_pointer_entry *pdp; + const uint64_t orig_start = start; + const uint64_t orig_length = length; + uint64_t temp, pml4e; + int ret = 0; + + /* Do the pml4 allocations first, so we don't need to track the newly + * allocated tables below the pdp */ + bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + + /* The page_directoryectory and pagetable allocations are done in the shared 3 + * and 4 level code. Just allocate the pdps. + */ + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + if (!pdp) { + WARN_ON(test_bit(pml4e, pml4->used_pml4es)); + pdp = alloc_pdp_single(ppgtt, pml4); + if (IS_ERR(pdp)) + goto err_alloc; + + pml4->pdps[pml4e] = pdp; + set_bit(pml4e, new_pdps); + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, pml4e, + pml4e << GEN8_PML4E_SHIFT, + GEN8_PML4E_SHIFT); + + } + } + + WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, + "The allocation has spanned more than 512GB. " + "It is highly likely this is incorrect."); + + start = orig_start; + length = orig_length; + + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) { + BUG_ON(!pdp); + + ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); + if (ret) + goto err_out; + } + + bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, + GEN8_PML4ES_PER_PML4); + + return 0; + +err_out: + start = orig_start; + length = orig_length; + gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) + gen8_ppgtt_free_3lvl(pdp, vm->dev); + +err_alloc: + for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) + unmap_and_free_pdp(pdp, vm->dev); + + return ret; } static int gen8_alloc_va_range(struct i915_address_space *vm, @@ -1054,16 +1218,19 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); - if (!USES_FULL_48BIT_PPGTT(vm->dev)) - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); - else + if (USES_FULL_48BIT_PPGTT(vm->dev)) return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); + else + return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); } static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) { unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); - unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + pml4_fini(&ppgtt->pml4); + else + unmap_and_free_pdp(&ppgtt->pdp, ppgtt->base.dev); } /** @@ -1086,14 +1253,21 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->switch_mm = gen8_mm_switch; - if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + int ret = pml4_init(ppgtt); + if (ret) { + unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); + return ret; + } + } else { int ret = __pdp_init(&ppgtt->pdp, false); if (ret) { unmap_and_free_pt(ppgtt->scratch_pd, ppgtt->base.dev); return ret; } - } else - return -EPERM; /* Not yet implemented */ + + trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, 0, 0, GEN8_PML4E_SHIFT); + } return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 144858e..1477f54 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -87,6 +87,7 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; */ #define GEN8_PML4ES_PER_PML4 512 #define GEN8_PML4E_SHIFT 39 +#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) #define GEN8_PDPE_SHIFT 30 /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ @@ -427,6 +428,14 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pml4e(pdp, pml4, start, length, temp, iter) \ + for (iter = gen8_pml4e_index(start), pdp = (pml4)->pdps[iter]; \ + length > 0 && iter < GEN8_PML4ES_PER_PML4; \ + pdp = (pml4)->pdps[++iter], \ + temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT) - start, \ + temp = min(temp, length), \ + start += temp, length -= temp) + #define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) @@ -458,7 +467,7 @@ static inline uint32_t gen8_pdpe_index(uint64_t address) static inline uint32_t gen8_pml4e_index(uint64_t address) { - BUG(); /* For 64B */ + return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } static inline size_t gen8_pte_count(uint64_t addr, uint64_t length)