Message ID | 1383451680-11173-24-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, 2013-11-02 at 21:07 -0700, Ben Widawsky wrote: > Aside from the potential size increase of the PPGTT, the primary > difference from previous hardware is the Page Directories are no longer > carved out of the Global GTT. > > Note that the PDE allocation is done as a 8MB contiguous allocation, > this needs to be eventually fixed (since driver reloading will be a > pain otherwise). Also, this will be a no-go for real PPGTT support. > > v2: Move vtable initialization > > v3: Resolve conflicts due to patch series reordering. > > v4: Rebase on top of the address space refactoring of the PPGTT > support. Drop Imre's r-b tag for v2, too outdated by now. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v2) > Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> > --- > drivers/gpu/drm/i915/i915_drv.h | 19 ++++-- > drivers/gpu/drm/i915/i915_gem_gtt.c | 123 +++++++++++++++++++++++++++++++++++- > 2 files changed, 137 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 83d016c..97b0905 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -572,10 +572,21 @@ struct i915_gtt { > struct i915_hw_ppgtt { > struct i915_address_space base; > unsigned num_pd_entries; > - struct page **pt_pages; > - uint32_t pd_offset; > - dma_addr_t *pt_dma_addr; > - > + union { > + struct page **pt_pages; > + struct page *gen8_pt_pages; > + }; > + struct page *pd_pages; > + int num_pd_pages; > + int num_pt_pages; > + union { > + uint32_t pd_offset; > + dma_addr_t pd_dma_addr[4]; > + }; > + union { > + dma_addr_t *pt_dma_addr; > + dma_addr_t *gen8_pt_dma_addr[4]; > + }; > int (*enable)(struct drm_device *dev); > }; > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 02de12d..4a11f51 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -31,6 +31,7 @@ > #define GEN6_PPGTT_PD_ENTRIES 512 > #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) > typedef uint64_t gen8_gtt_pte_t; > +typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; > > /* PPGTT stuff */ > #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) > @@ -58,6 +59,9 @@ typedef uint64_t gen8_gtt_pte_t; > #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) > #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) > > +#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) > +#define GEN8_LEGACY_PDPS 4 > + > #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) > #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ > #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ > @@ -177,6 +181,123 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > return pte; > } > > +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > +{ > + struct i915_hw_ppgtt *ppgtt = > + container_of(vm, struct i915_hw_ppgtt, base); > + int i, j; > + > + for (i = 0; i < ppgtt->num_pd_pages ; i++) { > + if (ppgtt->pd_dma_addr[i]) { > + pci_unmap_page(ppgtt->base.dev->pdev, > + ppgtt->pd_dma_addr[i], > + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > + > + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > + dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; > + if (addr) > + pci_unmap_page(ppgtt->base.dev->pdev, > + addr, > + PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + > + } > + } > + kfree(ppgtt->gen8_pt_dma_addr[i]); > + } > + > + __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages)); > + __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages)); get_order takes size not a page count. With that fixed: Reviewed-by: Imre Deak <imre.deak@intel.com> > +} > + > +/** > + * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a > + * net effect resembling a 2-level page table in normal x86 terms. Each PDP > + * represents 1GB of memory > + * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. > + * > + * TODO: Do something with the size parameter > + **/ > +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > +{ > + struct page *pt_pages; > + int i, j, ret = -ENOMEM; > + const int max_pdp = DIV_ROUND_UP(size, 1 << 30); > + const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; > + > + if (size % (1<<30)) > + DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); > + > + /* FIXME: split allocation into smaller pieces. For now we only ever do > + * this once, but with full PPGTT, the multiple contiguous allocations > + * will be bad. > + */ > + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); > + if (!ppgtt->pd_pages) > + return -ENOMEM; > + > + pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); > + if (!pt_pages) { > + __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); > + return -ENOMEM; > + } > + > + ppgtt->gen8_pt_pages = pt_pages; > + ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); > + ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); > + ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; > + ppgtt->base.clear_range = NULL; > + ppgtt->base.insert_entries = NULL; > + ppgtt->base.cleanup = gen8_ppgtt_cleanup; > + > + BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); > + > + /* > + * - Create a mapping for the page directories. > + * - For each page directory: > + * allocate space for page table mappings. > + * map each page table > + */ > + for (i = 0; i < max_pdp; i++) { > + dma_addr_t temp; > + temp = pci_map_page(ppgtt->base.dev->pdev, > + &ppgtt->pd_pages[i], 0, > + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > + goto err_out; > + > + ppgtt->pd_dma_addr[i] = temp; > + > + ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); > + if (!ppgtt->gen8_pt_dma_addr[i]) > + goto err_out; > + > + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > + struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; > + temp = pci_map_page(ppgtt->base.dev->pdev, > + p, 0, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + > + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > + goto err_out; > + > + ppgtt->gen8_pt_dma_addr[i][j] = temp; > + } > + } > + > + DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", > + ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); > + DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", > + ppgtt->num_pt_pages, > + (ppgtt->num_pt_pages - num_pt_pages) + > + size % (1<<30)); > + return -ENOSYS; /* Not ready yet */ > + > +err_out: > + ppgtt->base.cleanup(&ppgtt->base); > + return ret; > +} > + > static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) > { > struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; > @@ -430,7 +551,7 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) > if (INTEL_INFO(dev)->gen < 8) > ret = gen6_ppgtt_init(ppgtt); > else if (IS_GEN8(dev)) > - ret = -ENXIO; > + ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); > else > BUG(); >
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 83d016c..97b0905 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -572,10 +572,21 @@ struct i915_gtt { struct i915_hw_ppgtt { struct i915_address_space base; unsigned num_pd_entries; - struct page **pt_pages; - uint32_t pd_offset; - dma_addr_t *pt_dma_addr; - + union { + struct page **pt_pages; + struct page *gen8_pt_pages; + }; + struct page *pd_pages; + int num_pd_pages; + int num_pt_pages; + union { + uint32_t pd_offset; + dma_addr_t pd_dma_addr[4]; + }; + union { + dma_addr_t *pt_dma_addr; + dma_addr_t *gen8_pt_dma_addr[4]; + }; int (*enable)(struct drm_device *dev); }; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 02de12d..4a11f51 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -31,6 +31,7 @@ #define GEN6_PPGTT_PD_ENTRIES 512 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) typedef uint64_t gen8_gtt_pte_t; +typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; /* PPGTT stuff */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) @@ -58,6 +59,9 @@ typedef uint64_t gen8_gtt_pte_t; #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) +#define GEN8_LEGACY_PDPS 4 + #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ @@ -177,6 +181,123 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, return pte; } +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + int i, j; + + for (i = 0; i < ppgtt->num_pd_pages ; i++) { + if (ppgtt->pd_dma_addr[i]) { + pci_unmap_page(ppgtt->base.dev->pdev, + ppgtt->pd_dma_addr[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; + if (addr) + pci_unmap_page(ppgtt->base.dev->pdev, + addr, + PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + } + } + kfree(ppgtt->gen8_pt_dma_addr[i]); + } + + __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages)); + __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages)); +} + +/** + * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a + * net effect resembling a 2-level page table in normal x86 terms. Each PDP + * represents 1GB of memory + * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. + * + * TODO: Do something with the size parameter + **/ +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) +{ + struct page *pt_pages; + int i, j, ret = -ENOMEM; + const int max_pdp = DIV_ROUND_UP(size, 1 << 30); + const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + + if (size % (1<<30)) + DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); + + /* FIXME: split allocation into smaller pieces. For now we only ever do + * this once, but with full PPGTT, the multiple contiguous allocations + * will be bad. + */ + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); + if (!ppgtt->pd_pages) + return -ENOMEM; + + pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); + if (!pt_pages) { + __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); + return -ENOMEM; + } + + ppgtt->gen8_pt_pages = pt_pages; + ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); + ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); + ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; + ppgtt->base.clear_range = NULL; + ppgtt->base.insert_entries = NULL; + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + + BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); + + /* + * - Create a mapping for the page directories. + * - For each page directory: + * allocate space for page table mappings. + * map each page table + */ + for (i = 0; i < max_pdp; i++) { + dma_addr_t temp; + temp = pci_map_page(ppgtt->base.dev->pdev, + &ppgtt->pd_pages[i], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) + goto err_out; + + ppgtt->pd_dma_addr[i] = temp; + + ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); + if (!ppgtt->gen8_pt_dma_addr[i]) + goto err_out; + + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; + temp = pci_map_page(ppgtt->base.dev->pdev, + p, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) + goto err_out; + + ppgtt->gen8_pt_dma_addr[i][j] = temp; + } + } + + DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", + ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); + DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", + ppgtt->num_pt_pages, + (ppgtt->num_pt_pages - num_pt_pages) + + size % (1<<30)); + return -ENOSYS; /* Not ready yet */ + +err_out: + ppgtt->base.cleanup(&ppgtt->base); + return ret; +} + static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; @@ -430,7 +551,7 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 8) ret = gen6_ppgtt_init(ppgtt); else if (IS_GEN8(dev)) - ret = -ENXIO; + ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); else BUG();