Message ID | 1424794985-14441-4-git-send-email-michel.thierry@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Michel Thierry <michel.thierry@intel.com> writes: > From: Ben Widawsky <benjamin.widawsky@intel.com> > > As we move toward dynamic page table allocation, it becomes much easier > to manage our data structures if break do things less coarsely by > breaking up all of our actions into individual tasks. This makes the > code easier to write, read, and verify. > > Aside from the dissection of the allocation functions, the patch > statically allocates the page table structures without a page directory. > This remains the same for all platforms, > > The patch itself should not have much functional difference. The primary > noticeable difference is the fact that page tables are no longer > allocated, but rather statically declared as part of the page directory. > This has non-zero overhead, but things gain additional complexity as a > result. > > This patch exists for a few reasons: > 1. Splitting out the functions allows easily combining GEN6 and GEN8 > code. Page tables have no difference based on GEN8. As we'll see in a > future patch when we add the DMA mappings to the allocations, it > requires only one small change to make work, and error handling should > just fall into place. > > 2. Unless we always want to allocate all page tables under a given PDE, > we'll have to eventually break this up into an array of pointers (or > pointer to pointer). > > 3. Having the discrete functions is easier to review, and understand. > All allocations and frees now take place in just a couple of locations. > Reviewing, and catching leaks should be easy. > > 4. Less important: the GFP flags are confined to one location, which > makes playing around with such things trivial. > > v2: Updated commit message to explain why this patch exists > > v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/ > > v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel) > > v5: Added additional safety checks in gen8 clear/free/unmap. > > v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika). > > v7: Make err_out loop symmetrical to the way we allocate in > alloc_pt_range. Also s/page_tables/page_table and correct commit > message (Mika) > > Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+) Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 254 ++++++++++++++++++++++++------------ > drivers/gpu/drm/i915/i915_gem_gtt.h | 4 +- > drivers/gpu/drm/i915/intel_lrc.c | 16 +-- > 3 files changed, 178 insertions(+), 96 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index ab6f1d4..81c1dba 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -279,6 +279,98 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, > return pte; > } > > +static void unmap_and_free_pt(struct i915_page_table_entry *pt) > +{ > + if (WARN_ON(!pt->page)) > + return; > + __free_page(pt->page); > + kfree(pt); > +} > + > +static struct i915_page_table_entry *alloc_pt_single(void) > +{ > + struct i915_page_table_entry *pt; > + > + pt = kzalloc(sizeof(*pt), GFP_KERNEL); > + if (!pt) > + return ERR_PTR(-ENOMEM); > + > + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pt->page) { > + kfree(pt); > + return ERR_PTR(-ENOMEM); > + } > + > + return pt; > +} > + > +/** > + * alloc_pt_range() - Allocate a multiple page tables > + * @pd: The page directory which will have at least @count entries > + * available to point to the allocated page tables. > + * @pde: First page directory entry for which we are allocating. > + * @count: Number of pages to allocate. > + * > + * Allocates multiple page table pages and sets the appropriate entries in the > + * page table structure within the page directory. Function cleans up after > + * itself on any failures. > + * > + * Return: 0 if allocation succeeded. > + */ > +static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count) > +{ > + int i, ret; > + > + /* 512 is the max page tables per page_directory on any platform. */ > + if (WARN_ON(pde + count > GEN6_PPGTT_PD_ENTRIES)) > + return -EINVAL; > + > + for (i = pde; i < pde + count; i++) { > + struct i915_page_table_entry *pt = alloc_pt_single(); > + > + if (IS_ERR(pt)) { > + ret = PTR_ERR(pt); > + goto err_out; > + } > + WARN(pd->page_table[i], > + "Leaking page directory entry %d (%pa)\n", > + i, pd->page_table[i]); > + pd->page_table[i] = pt; > + } > + > + return 0; > + > +err_out: > + while (i-- > pde) > + unmap_and_free_pt(pd->page_table[i]); > + return ret; > +} > + > +static void unmap_and_free_pd(struct i915_page_directory_entry *pd) > +{ > + if (pd->page) { > + __free_page(pd->page); > + kfree(pd); > + } > +} > + > +static struct i915_page_directory_entry *alloc_pd_single(void) > +{ > + struct i915_page_directory_entry *pd; > + > + pd = kzalloc(sizeof(*pd), GFP_KERNEL); > + if (!pd) > + return ERR_PTR(-ENOMEM); > + > + pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > + if (!pd->page) { > + kfree(pd); > + return ERR_PTR(-ENOMEM); > + } > + > + return pd; > +} > + > /* Broadwell Page Directory Pointer Descriptors */ > static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, > uint64_t val) > @@ -311,7 +403,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, > int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; > > for (i = used_pd - 1; i >= 0; i--) { > - dma_addr_t addr = ppgtt->pdp.page_directory[i].daddr; > + dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr; > ret = gen8_write_pdp(ring, i, addr); > if (ret) > return ret; > @@ -338,8 +430,24 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, > I915_CACHE_LLC, use_scratch); > > while (num_entries) { > - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; > - struct page *page_table = pd->page_table[pde].page; > + struct i915_page_directory_entry *pd; > + struct i915_page_table_entry *pt; > + struct page *page_table; > + > + if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) > + continue; > + > + pd = ppgtt->pdp.page_directory[pdpe]; > + > + if (WARN_ON(!pd->page_table[pde])) > + continue; > + > + pt = pd->page_table[pde]; > + > + if (WARN_ON(!pt->page)) > + continue; > + > + page_table = pt->page; > > last_pte = pte + num_entries; > if (last_pte > GEN8_PTES_PER_PAGE) > @@ -384,8 +492,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, > break; > > if (pt_vaddr == NULL) { > - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; > - struct page *page_table = pd->page_table[pde].page; > + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe]; > + struct i915_page_table_entry *pt = pd->page_table[pde]; > + struct page *page_table = pt->page; > > pt_vaddr = kmap_atomic(page_table); > } > @@ -416,19 +525,16 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd) > { > int i; > > - if (pd->page_table == NULL) > + if (!pd->page) > return; > > - for (i = 0; i < GEN8_PDES_PER_PAGE; i++) > - if (pd->page_table[i].page) > - __free_page(pd->page_table[i].page); > -} > + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { > + if (WARN_ON(!pd->page_table[i])) > + continue; > > -static void gen8_free_page_directory(struct i915_page_directory_entry *pd) > -{ > - gen8_free_page_tables(pd); > - kfree(pd->page_table); > - __free_page(pd->page); > + unmap_and_free_pt(pd->page_table[i]); > + pd->page_table[i] = NULL; > + } > } > > static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > @@ -436,7 +542,11 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > int i; > > for (i = 0; i < ppgtt->num_pd_pages; i++) { > - gen8_free_page_directory(&ppgtt->pdp.page_directory[i]); > + if (WARN_ON(!ppgtt->pdp.page_directory[i])) > + continue; > + > + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); > + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); > } > } > > @@ -448,14 +558,23 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > for (i = 0; i < ppgtt->num_pd_pages; i++) { > /* TODO: In the future we'll support sparse mappings, so this > * will have to change. */ > - if (!ppgtt->pdp.page_directory[i].daddr) > + if (!ppgtt->pdp.page_directory[i]->daddr) > continue; > > - pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i].daddr, PAGE_SIZE, > + pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE, > PCI_DMA_BIDIRECTIONAL); > > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; > + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; > + struct i915_page_table_entry *pt; > + dma_addr_t addr; > + > + if (WARN_ON(!pd->page_table[j])) > + continue; > + > + pt = pd->page_table[j]; > + addr = pt->daddr; > + > if (addr) > pci_unmap_page(hwdev, addr, PAGE_SIZE, > PCI_DMA_BIDIRECTIONAL); > @@ -474,25 +593,20 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > > static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) > { > - int i, j; > + int i, ret; > > for (i = 0; i < ppgtt->num_pd_pages; i++) { > - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[i]; > - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > - struct i915_page_table_entry *pt = &pd->page_table[j]; > - > - pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); > - if (!pt->page) > - goto unwind_out; > - > - } > + ret = alloc_pt_range(ppgtt->pdp.page_directory[i], > + 0, GEN8_PDES_PER_PAGE); > + if (ret) > + goto unwind_out; > } > > return 0; > > unwind_out: > while (i--) > - gen8_free_page_tables(&ppgtt->pdp.page_directory[i]); > + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); > > return -ENOMEM; > } > @@ -503,19 +617,9 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, > int i; > > for (i = 0; i < max_pdp; i++) { > - struct i915_page_table_entry *pt; > - > - pt = kcalloc(GEN8_PDES_PER_PAGE, sizeof(*pt), GFP_KERNEL); > - if (!pt) > - goto unwind_out; > - > - ppgtt->pdp.page_directory[i].page = alloc_page(GFP_KERNEL); > - if (!ppgtt->pdp.page_directory[i].page) { > - kfree(pt); > + ppgtt->pdp.page_directory[i] = alloc_pd_single(); > + if (IS_ERR(ppgtt->pdp.page_directory[i])) > goto unwind_out; > - } > - > - ppgtt->pdp.page_directory[i].page_table = pt; > } > > ppgtt->num_pd_pages = max_pdp; > @@ -524,10 +628,8 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, > return 0; > > unwind_out: > - while (i--) { > - kfree(ppgtt->pdp.page_directory[i].page_table); > - __free_page(ppgtt->pdp.page_directory[i].page); > - } > + while (i--) > + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); > > return -ENOMEM; > } > @@ -561,14 +663,14 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, > int ret; > > pd_addr = pci_map_page(ppgtt->base.dev->pdev, > - ppgtt->pdp.page_directory[pd].page, 0, > + ppgtt->pdp.page_directory[pd]->page, 0, > PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > > ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); > if (ret) > return ret; > > - ppgtt->pdp.page_directory[pd].daddr = pd_addr; > + ppgtt->pdp.page_directory[pd]->daddr = pd_addr; > > return 0; > } > @@ -578,8 +680,8 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, > const int pt) > { > dma_addr_t pt_addr; > - struct i915_page_directory_entry *pdir = &ppgtt->pdp.page_directory[pd]; > - struct i915_page_table_entry *ptab = &pdir->page_table[pt]; > + struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd]; > + struct i915_page_table_entry *ptab = pdir->page_table[pt]; > struct page *p = ptab->page; > int ret; > > @@ -642,10 +744,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > * will never need to touch the PDEs again. > */ > for (i = 0; i < max_pdp; i++) { > + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; > gen8_ppgtt_pde_t *pd_vaddr; > - pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i].page); > + pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page); > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; > + struct i915_page_table_entry *pt = pd->page_table[j]; > + dma_addr_t addr = pt->daddr; > pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, > I915_CACHE_LLC); > } > @@ -696,7 +800,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { > u32 expected; > gen6_gtt_pte_t *pt_vaddr; > - dma_addr_t pt_addr = ppgtt->pd.page_table[pde].daddr; > + dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr; > pd_entry = readl(pd_addr + pde); > expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); > > @@ -707,7 +811,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) > expected); > seq_printf(m, "\tPDE: %x\n", pd_entry); > > - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde].page); > + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page); > for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { > unsigned long va = > (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + > @@ -746,7 +850,7 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) > for (i = 0; i < ppgtt->num_pd_entries; i++) { > dma_addr_t pt_addr; > > - pt_addr = ppgtt->pd.page_table[i].daddr; > + pt_addr = ppgtt->pd.page_table[i]->daddr; > pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); > pd_entry |= GEN6_PDE_VALID; > > @@ -922,7 +1026,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > if (last_pte > I915_PPGTT_PT_ENTRIES) > last_pte = I915_PPGTT_PT_ENTRIES; > > - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); > + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); > > for (i = first_pte; i < last_pte; i++) > pt_vaddr[i] = scratch_pte; > @@ -951,7 +1055,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > pt_vaddr = NULL; > for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { > if (pt_vaddr == NULL) > - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); > + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); > > pt_vaddr[act_pte] = > vm->pte_encode(sg_page_iter_dma_address(&sg_iter), > @@ -974,7 +1078,7 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > > for (i = 0; i < ppgtt->num_pd_entries; i++) > pci_unmap_page(ppgtt->base.dev->pdev, > - ppgtt->pd.page_table[i].daddr, > + ppgtt->pd.page_table[i]->daddr, > 4096, PCI_DMA_BIDIRECTIONAL); > } > > @@ -983,9 +1087,9 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > int i; > > for (i = 0; i < ppgtt->num_pd_entries; i++) > - if (ppgtt->pd.page_table[i].page) > - __free_page(ppgtt->pd.page_table[i].page); > - kfree(ppgtt->pd.page_table); > + unmap_and_free_pt(ppgtt->pd.page_table[i]); > + > + unmap_and_free_pd(&ppgtt->pd); > } > > static void gen6_ppgtt_cleanup(struct i915_address_space *vm) > @@ -1040,28 +1144,6 @@ alloc: > return 0; > } > > -static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) > -{ > - struct i915_page_table_entry *pt; > - int i; > - > - pt = kcalloc(ppgtt->num_pd_entries, sizeof(*pt), GFP_KERNEL); > - if (!pt) > - return -ENOMEM; > - > - ppgtt->pd.page_table = pt; > - > - for (i = 0; i < ppgtt->num_pd_entries; i++) { > - pt[i].page = alloc_page(GFP_KERNEL); > - if (!pt->page) { > - gen6_ppgtt_free(ppgtt); > - return -ENOMEM; > - } > - } > - > - return 0; > -} > - > static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) > { > int ret; > @@ -1070,7 +1152,7 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) > if (ret) > return ret; > > - ret = gen6_ppgtt_allocate_page_tables(ppgtt); > + ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries); > if (ret) { > drm_mm_remove_node(&ppgtt->node); > return ret; > @@ -1088,7 +1170,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) > struct page *page; > dma_addr_t pt_addr; > > - page = ppgtt->pd.page_table[i].page; > + page = ppgtt->pd.page_table[i]->page; > pt_addr = pci_map_page(dev->pdev, page, 0, 4096, > PCI_DMA_BIDIRECTIONAL); > > @@ -1097,7 +1179,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) > return -EIO; > } > > - ppgtt->pd.page_table[i].daddr = pt_addr; > + ppgtt->pd.page_table[i]->daddr = pt_addr; > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 1144b709..c9e93f5 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -199,12 +199,12 @@ struct i915_page_directory_entry { > dma_addr_t daddr; > }; > > - struct i915_page_table_entry *page_table; > + struct i915_page_table_entry *page_table[GEN6_PPGTT_PD_ENTRIES]; /* PDEs */ > }; > > struct i915_page_directory_pointer_entry { > /* struct page *page; */ > - struct i915_page_directory_entry page_directory[GEN8_LEGACY_PDPES]; > + struct i915_page_directory_entry *page_directory[GEN8_LEGACY_PDPES]; > }; > > struct i915_address_space { > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 9e71992..bc9c7c3 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1735,14 +1735,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o > reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > - reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3].daddr); > - reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3].daddr); > - reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2].daddr); > - reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2].daddr); > - reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1].daddr); > - reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1].daddr); > - reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0].daddr); > - reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0].daddr); > + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr); > + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr); > + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr); > + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr); > + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr); > + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr); > + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr); > + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr); > if (ring->id == RCS) { > reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); > reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; > -- > 2.1.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
2015-02-25 10:34 GMT-03:00 Mika Kuoppala <mika.kuoppala@linux.intel.com>: > Michel Thierry <michel.thierry@intel.com> writes: > >> From: Ben Widawsky <benjamin.widawsky@intel.com> >> >> As we move toward dynamic page table allocation, it becomes much easier >> to manage our data structures if break do things less coarsely by >> breaking up all of our actions into individual tasks. This makes the >> code easier to write, read, and verify. QA reported a regression caused by this patch. BSW doesn't boot anymore. https://bugs.freedesktop.org/show_bug.cgi?id=89350 >> >> Aside from the dissection of the allocation functions, the patch >> statically allocates the page table structures without a page directory. >> This remains the same for all platforms, >> >> The patch itself should not have much functional difference. The primary >> noticeable difference is the fact that page tables are no longer >> allocated, but rather statically declared as part of the page directory. >> This has non-zero overhead, but things gain additional complexity as a >> result. >> >> This patch exists for a few reasons: >> 1. Splitting out the functions allows easily combining GEN6 and GEN8 >> code. Page tables have no difference based on GEN8. As we'll see in a >> future patch when we add the DMA mappings to the allocations, it >> requires only one small change to make work, and error handling should >> just fall into place. >> >> 2. Unless we always want to allocate all page tables under a given PDE, >> we'll have to eventually break this up into an array of pointers (or >> pointer to pointer). >> >> 3. Having the discrete functions is easier to review, and understand. >> All allocations and frees now take place in just a couple of locations. >> Reviewing, and catching leaks should be easy. >> >> 4. Less important: the GFP flags are confined to one location, which >> makes playing around with such things trivial. >> >> v2: Updated commit message to explain why this patch exists >> >> v3: For lrc, s/pdp.page_directory[i].daddr/pdp.page_directory[i]->daddr/ >> >> v4: Renamed free_pt/pd_single functions to unmap_and_free_pt/pd (Daniel) >> >> v5: Added additional safety checks in gen8 clear/free/unmap. >> >> v6: Use WARN_ON and return -EINVAL in alloc_pt_range (Mika). >> >> v7: Make err_out loop symmetrical to the way we allocate in >> alloc_pt_range. Also s/page_tables/page_table and correct commit >> message (Mika) >> >> Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com> >> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> >> Signed-off-by: Michel Thierry <michel.thierry@intel.com> (v3+) > > Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> > >> --- >> drivers/gpu/drm/i915/i915_gem_gtt.c | 254 ++++++++++++++++++++++++------------ >> drivers/gpu/drm/i915/i915_gem_gtt.h | 4 +- >> drivers/gpu/drm/i915/intel_lrc.c | 16 +-- >> 3 files changed, 178 insertions(+), 96 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> index ab6f1d4..81c1dba 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> @@ -279,6 +279,98 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, >> return pte; >> } >> >> +static void unmap_and_free_pt(struct i915_page_table_entry *pt) >> +{ >> + if (WARN_ON(!pt->page)) >> + return; >> + __free_page(pt->page); >> + kfree(pt); >> +} >> + >> +static struct i915_page_table_entry *alloc_pt_single(void) >> +{ >> + struct i915_page_table_entry *pt; >> + >> + pt = kzalloc(sizeof(*pt), GFP_KERNEL); >> + if (!pt) >> + return ERR_PTR(-ENOMEM); >> + >> + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!pt->page) { >> + kfree(pt); >> + return ERR_PTR(-ENOMEM); >> + } >> + >> + return pt; >> +} >> + >> +/** >> + * alloc_pt_range() - Allocate a multiple page tables >> + * @pd: The page directory which will have at least @count entries >> + * available to point to the allocated page tables. >> + * @pde: First page directory entry for which we are allocating. >> + * @count: Number of pages to allocate. >> + * >> + * Allocates multiple page table pages and sets the appropriate entries in the >> + * page table structure within the page directory. Function cleans up after >> + * itself on any failures. >> + * >> + * Return: 0 if allocation succeeded. >> + */ >> +static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count) >> +{ >> + int i, ret; >> + >> + /* 512 is the max page tables per page_directory on any platform. */ >> + if (WARN_ON(pde + count > GEN6_PPGTT_PD_ENTRIES)) >> + return -EINVAL; >> + >> + for (i = pde; i < pde + count; i++) { >> + struct i915_page_table_entry *pt = alloc_pt_single(); >> + >> + if (IS_ERR(pt)) { >> + ret = PTR_ERR(pt); >> + goto err_out; >> + } >> + WARN(pd->page_table[i], >> + "Leaking page directory entry %d (%pa)\n", >> + i, pd->page_table[i]); >> + pd->page_table[i] = pt; >> + } >> + >> + return 0; >> + >> +err_out: >> + while (i-- > pde) >> + unmap_and_free_pt(pd->page_table[i]); >> + return ret; >> +} >> + >> +static void unmap_and_free_pd(struct i915_page_directory_entry *pd) >> +{ >> + if (pd->page) { >> + __free_page(pd->page); >> + kfree(pd); >> + } >> +} >> + >> +static struct i915_page_directory_entry *alloc_pd_single(void) >> +{ >> + struct i915_page_directory_entry *pd; >> + >> + pd = kzalloc(sizeof(*pd), GFP_KERNEL); >> + if (!pd) >> + return ERR_PTR(-ENOMEM); >> + >> + pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> + if (!pd->page) { >> + kfree(pd); >> + return ERR_PTR(-ENOMEM); >> + } >> + >> + return pd; >> +} >> + >> /* Broadwell Page Directory Pointer Descriptors */ >> static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, >> uint64_t val) >> @@ -311,7 +403,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, >> int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; >> >> for (i = used_pd - 1; i >= 0; i--) { >> - dma_addr_t addr = ppgtt->pdp.page_directory[i].daddr; >> + dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr; >> ret = gen8_write_pdp(ring, i, addr); >> if (ret) >> return ret; >> @@ -338,8 +430,24 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, >> I915_CACHE_LLC, use_scratch); >> >> while (num_entries) { >> - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; >> - struct page *page_table = pd->page_table[pde].page; >> + struct i915_page_directory_entry *pd; >> + struct i915_page_table_entry *pt; >> + struct page *page_table; >> + >> + if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) >> + continue; >> + >> + pd = ppgtt->pdp.page_directory[pdpe]; >> + >> + if (WARN_ON(!pd->page_table[pde])) >> + continue; >> + >> + pt = pd->page_table[pde]; >> + >> + if (WARN_ON(!pt->page)) >> + continue; >> + >> + page_table = pt->page; >> >> last_pte = pte + num_entries; >> if (last_pte > GEN8_PTES_PER_PAGE) >> @@ -384,8 +492,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, >> break; >> >> if (pt_vaddr == NULL) { >> - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; >> - struct page *page_table = pd->page_table[pde].page; >> + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe]; >> + struct i915_page_table_entry *pt = pd->page_table[pde]; >> + struct page *page_table = pt->page; >> >> pt_vaddr = kmap_atomic(page_table); >> } >> @@ -416,19 +525,16 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd) >> { >> int i; >> >> - if (pd->page_table == NULL) >> + if (!pd->page) >> return; >> >> - for (i = 0; i < GEN8_PDES_PER_PAGE; i++) >> - if (pd->page_table[i].page) >> - __free_page(pd->page_table[i].page); >> -} >> + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { >> + if (WARN_ON(!pd->page_table[i])) >> + continue; >> >> -static void gen8_free_page_directory(struct i915_page_directory_entry *pd) >> -{ >> - gen8_free_page_tables(pd); >> - kfree(pd->page_table); >> - __free_page(pd->page); >> + unmap_and_free_pt(pd->page_table[i]); >> + pd->page_table[i] = NULL; >> + } >> } >> >> static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) >> @@ -436,7 +542,11 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) >> int i; >> >> for (i = 0; i < ppgtt->num_pd_pages; i++) { >> - gen8_free_page_directory(&ppgtt->pdp.page_directory[i]); >> + if (WARN_ON(!ppgtt->pdp.page_directory[i])) >> + continue; >> + >> + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); >> + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); >> } >> } >> >> @@ -448,14 +558,23 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) >> for (i = 0; i < ppgtt->num_pd_pages; i++) { >> /* TODO: In the future we'll support sparse mappings, so this >> * will have to change. */ >> - if (!ppgtt->pdp.page_directory[i].daddr) >> + if (!ppgtt->pdp.page_directory[i]->daddr) >> continue; >> >> - pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i].daddr, PAGE_SIZE, >> + pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE, >> PCI_DMA_BIDIRECTIONAL); >> >> for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { >> - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; >> + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; >> + struct i915_page_table_entry *pt; >> + dma_addr_t addr; >> + >> + if (WARN_ON(!pd->page_table[j])) >> + continue; >> + >> + pt = pd->page_table[j]; >> + addr = pt->daddr; >> + >> if (addr) >> pci_unmap_page(hwdev, addr, PAGE_SIZE, >> PCI_DMA_BIDIRECTIONAL); >> @@ -474,25 +593,20 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) >> >> static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) >> { >> - int i, j; >> + int i, ret; >> >> for (i = 0; i < ppgtt->num_pd_pages; i++) { >> - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[i]; >> - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { >> - struct i915_page_table_entry *pt = &pd->page_table[j]; >> - >> - pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); >> - if (!pt->page) >> - goto unwind_out; >> - >> - } >> + ret = alloc_pt_range(ppgtt->pdp.page_directory[i], >> + 0, GEN8_PDES_PER_PAGE); >> + if (ret) >> + goto unwind_out; >> } >> >> return 0; >> >> unwind_out: >> while (i--) >> - gen8_free_page_tables(&ppgtt->pdp.page_directory[i]); >> + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); >> >> return -ENOMEM; >> } >> @@ -503,19 +617,9 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, >> int i; >> >> for (i = 0; i < max_pdp; i++) { >> - struct i915_page_table_entry *pt; >> - >> - pt = kcalloc(GEN8_PDES_PER_PAGE, sizeof(*pt), GFP_KERNEL); >> - if (!pt) >> - goto unwind_out; >> - >> - ppgtt->pdp.page_directory[i].page = alloc_page(GFP_KERNEL); >> - if (!ppgtt->pdp.page_directory[i].page) { >> - kfree(pt); >> + ppgtt->pdp.page_directory[i] = alloc_pd_single(); >> + if (IS_ERR(ppgtt->pdp.page_directory[i])) >> goto unwind_out; >> - } >> - >> - ppgtt->pdp.page_directory[i].page_table = pt; >> } >> >> ppgtt->num_pd_pages = max_pdp; >> @@ -524,10 +628,8 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, >> return 0; >> >> unwind_out: >> - while (i--) { >> - kfree(ppgtt->pdp.page_directory[i].page_table); >> - __free_page(ppgtt->pdp.page_directory[i].page); >> - } >> + while (i--) >> + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); >> >> return -ENOMEM; >> } >> @@ -561,14 +663,14 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, >> int ret; >> >> pd_addr = pci_map_page(ppgtt->base.dev->pdev, >> - ppgtt->pdp.page_directory[pd].page, 0, >> + ppgtt->pdp.page_directory[pd]->page, 0, >> PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); >> >> ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); >> if (ret) >> return ret; >> >> - ppgtt->pdp.page_directory[pd].daddr = pd_addr; >> + ppgtt->pdp.page_directory[pd]->daddr = pd_addr; >> >> return 0; >> } >> @@ -578,8 +680,8 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, >> const int pt) >> { >> dma_addr_t pt_addr; >> - struct i915_page_directory_entry *pdir = &ppgtt->pdp.page_directory[pd]; >> - struct i915_page_table_entry *ptab = &pdir->page_table[pt]; >> + struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd]; >> + struct i915_page_table_entry *ptab = pdir->page_table[pt]; >> struct page *p = ptab->page; >> int ret; >> >> @@ -642,10 +744,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) >> * will never need to touch the PDEs again. >> */ >> for (i = 0; i < max_pdp; i++) { >> + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; >> gen8_ppgtt_pde_t *pd_vaddr; >> - pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i].page); >> + pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page); >> for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { >> - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; >> + struct i915_page_table_entry *pt = pd->page_table[j]; >> + dma_addr_t addr = pt->daddr; >> pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, >> I915_CACHE_LLC); >> } >> @@ -696,7 +800,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) >> for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { >> u32 expected; >> gen6_gtt_pte_t *pt_vaddr; >> - dma_addr_t pt_addr = ppgtt->pd.page_table[pde].daddr; >> + dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr; >> pd_entry = readl(pd_addr + pde); >> expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); >> >> @@ -707,7 +811,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) >> expected); >> seq_printf(m, "\tPDE: %x\n", pd_entry); >> >> - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde].page); >> + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page); >> for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { >> unsigned long va = >> (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + >> @@ -746,7 +850,7 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) >> for (i = 0; i < ppgtt->num_pd_entries; i++) { >> dma_addr_t pt_addr; >> >> - pt_addr = ppgtt->pd.page_table[i].daddr; >> + pt_addr = ppgtt->pd.page_table[i]->daddr; >> pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); >> pd_entry |= GEN6_PDE_VALID; >> >> @@ -922,7 +1026,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, >> if (last_pte > I915_PPGTT_PT_ENTRIES) >> last_pte = I915_PPGTT_PT_ENTRIES; >> >> - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); >> + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); >> >> for (i = first_pte; i < last_pte; i++) >> pt_vaddr[i] = scratch_pte; >> @@ -951,7 +1055,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, >> pt_vaddr = NULL; >> for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { >> if (pt_vaddr == NULL) >> - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); >> + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); >> >> pt_vaddr[act_pte] = >> vm->pte_encode(sg_page_iter_dma_address(&sg_iter), >> @@ -974,7 +1078,7 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) >> >> for (i = 0; i < ppgtt->num_pd_entries; i++) >> pci_unmap_page(ppgtt->base.dev->pdev, >> - ppgtt->pd.page_table[i].daddr, >> + ppgtt->pd.page_table[i]->daddr, >> 4096, PCI_DMA_BIDIRECTIONAL); >> } >> >> @@ -983,9 +1087,9 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) >> int i; >> >> for (i = 0; i < ppgtt->num_pd_entries; i++) >> - if (ppgtt->pd.page_table[i].page) >> - __free_page(ppgtt->pd.page_table[i].page); >> - kfree(ppgtt->pd.page_table); >> + unmap_and_free_pt(ppgtt->pd.page_table[i]); >> + >> + unmap_and_free_pd(&ppgtt->pd); >> } >> >> static void gen6_ppgtt_cleanup(struct i915_address_space *vm) >> @@ -1040,28 +1144,6 @@ alloc: >> return 0; >> } >> >> -static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) >> -{ >> - struct i915_page_table_entry *pt; >> - int i; >> - >> - pt = kcalloc(ppgtt->num_pd_entries, sizeof(*pt), GFP_KERNEL); >> - if (!pt) >> - return -ENOMEM; >> - >> - ppgtt->pd.page_table = pt; >> - >> - for (i = 0; i < ppgtt->num_pd_entries; i++) { >> - pt[i].page = alloc_page(GFP_KERNEL); >> - if (!pt->page) { >> - gen6_ppgtt_free(ppgtt); >> - return -ENOMEM; >> - } >> - } >> - >> - return 0; >> -} >> - >> static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) >> { >> int ret; >> @@ -1070,7 +1152,7 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) >> if (ret) >> return ret; >> >> - ret = gen6_ppgtt_allocate_page_tables(ppgtt); >> + ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries); >> if (ret) { >> drm_mm_remove_node(&ppgtt->node); >> return ret; >> @@ -1088,7 +1170,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) >> struct page *page; >> dma_addr_t pt_addr; >> >> - page = ppgtt->pd.page_table[i].page; >> + page = ppgtt->pd.page_table[i]->page; >> pt_addr = pci_map_page(dev->pdev, page, 0, 4096, >> PCI_DMA_BIDIRECTIONAL); >> >> @@ -1097,7 +1179,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) >> return -EIO; >> } >> >> - ppgtt->pd.page_table[i].daddr = pt_addr; >> + ppgtt->pd.page_table[i]->daddr = pt_addr; >> } >> >> return 0; >> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h >> index 1144b709..c9e93f5 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h >> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h >> @@ -199,12 +199,12 @@ struct i915_page_directory_entry { >> dma_addr_t daddr; >> }; >> >> - struct i915_page_table_entry *page_table; >> + struct i915_page_table_entry *page_table[GEN6_PPGTT_PD_ENTRIES]; /* PDEs */ >> }; >> >> struct i915_page_directory_pointer_entry { >> /* struct page *page; */ >> - struct i915_page_directory_entry page_directory[GEN8_LEGACY_PDPES]; >> + struct i915_page_directory_entry *page_directory[GEN8_LEGACY_PDPES]; >> }; >> >> struct i915_address_space { >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c >> index 9e71992..bc9c7c3 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -1735,14 +1735,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o >> reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); >> reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); >> reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); >> - reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3].daddr); >> - reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3].daddr); >> - reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2].daddr); >> - reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2].daddr); >> - reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1].daddr); >> - reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1].daddr); >> - reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0].daddr); >> - reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0].daddr); >> + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr); >> + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr); >> + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr); >> + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr); >> + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr); >> + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr); >> + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr); >> + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr); >> if (ring->id == RCS) { >> reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); >> reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8; >> -- >> 2.1.1 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ab6f1d4..81c1dba 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -279,6 +279,98 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, return pte; } +static void unmap_and_free_pt(struct i915_page_table_entry *pt) +{ + if (WARN_ON(!pt->page)) + return; + __free_page(pt->page); + kfree(pt); +} + +static struct i915_page_table_entry *alloc_pt_single(void) +{ + struct i915_page_table_entry *pt; + + pt = kzalloc(sizeof(*pt), GFP_KERNEL); + if (!pt) + return ERR_PTR(-ENOMEM); + + pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!pt->page) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + + return pt; +} + +/** + * alloc_pt_range() - Allocate a multiple page tables + * @pd: The page directory which will have at least @count entries + * available to point to the allocated page tables. + * @pde: First page directory entry for which we are allocating. + * @count: Number of pages to allocate. + * + * Allocates multiple page table pages and sets the appropriate entries in the + * page table structure within the page directory. Function cleans up after + * itself on any failures. + * + * Return: 0 if allocation succeeded. + */ +static int alloc_pt_range(struct i915_page_directory_entry *pd, uint16_t pde, size_t count) +{ + int i, ret; + + /* 512 is the max page tables per page_directory on any platform. */ + if (WARN_ON(pde + count > GEN6_PPGTT_PD_ENTRIES)) + return -EINVAL; + + for (i = pde; i < pde + count; i++) { + struct i915_page_table_entry *pt = alloc_pt_single(); + + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto err_out; + } + WARN(pd->page_table[i], + "Leaking page directory entry %d (%pa)\n", + i, pd->page_table[i]); + pd->page_table[i] = pt; + } + + return 0; + +err_out: + while (i-- > pde) + unmap_and_free_pt(pd->page_table[i]); + return ret; +} + +static void unmap_and_free_pd(struct i915_page_directory_entry *pd) +{ + if (pd->page) { + __free_page(pd->page); + kfree(pd); + } +} + +static struct i915_page_directory_entry *alloc_pd_single(void) +{ + struct i915_page_directory_entry *pd; + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!pd->page) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + return pd; +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, uint64_t val) @@ -311,7 +403,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; for (i = used_pd - 1; i >= 0; i--) { - dma_addr_t addr = ppgtt->pdp.page_directory[i].daddr; + dma_addr_t addr = ppgtt->pdp.page_directory[i]->daddr; ret = gen8_write_pdp(ring, i, addr); if (ret) return ret; @@ -338,8 +430,24 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, I915_CACHE_LLC, use_scratch); while (num_entries) { - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; - struct page *page_table = pd->page_table[pde].page; + struct i915_page_directory_entry *pd; + struct i915_page_table_entry *pt; + struct page *page_table; + + if (WARN_ON(!ppgtt->pdp.page_directory[pdpe])) + continue; + + pd = ppgtt->pdp.page_directory[pdpe]; + + if (WARN_ON(!pd->page_table[pde])) + continue; + + pt = pd->page_table[pde]; + + if (WARN_ON(!pt->page)) + continue; + + page_table = pt->page; last_pte = pte + num_entries; if (last_pte > GEN8_PTES_PER_PAGE) @@ -384,8 +492,9 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, break; if (pt_vaddr == NULL) { - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[pdpe]; - struct page *page_table = pd->page_table[pde].page; + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[pdpe]; + struct i915_page_table_entry *pt = pd->page_table[pde]; + struct page *page_table = pt->page; pt_vaddr = kmap_atomic(page_table); } @@ -416,19 +525,16 @@ static void gen8_free_page_tables(struct i915_page_directory_entry *pd) { int i; - if (pd->page_table == NULL) + if (!pd->page) return; - for (i = 0; i < GEN8_PDES_PER_PAGE; i++) - if (pd->page_table[i].page) - __free_page(pd->page_table[i].page); -} + for (i = 0; i < GEN8_PDES_PER_PAGE; i++) { + if (WARN_ON(!pd->page_table[i])) + continue; -static void gen8_free_page_directory(struct i915_page_directory_entry *pd) -{ - gen8_free_page_tables(pd); - kfree(pd->page_table); - __free_page(pd->page); + unmap_and_free_pt(pd->page_table[i]); + pd->page_table[i] = NULL; + } } static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) @@ -436,7 +542,11 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < ppgtt->num_pd_pages; i++) { - gen8_free_page_directory(&ppgtt->pdp.page_directory[i]); + if (WARN_ON(!ppgtt->pdp.page_directory[i])) + continue; + + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); } } @@ -448,14 +558,23 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) for (i = 0; i < ppgtt->num_pd_pages; i++) { /* TODO: In the future we'll support sparse mappings, so this * will have to change. */ - if (!ppgtt->pdp.page_directory[i].daddr) + if (!ppgtt->pdp.page_directory[i]->daddr) continue; - pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i].daddr, PAGE_SIZE, + pci_unmap_page(hwdev, ppgtt->pdp.page_directory[i]->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; + struct i915_page_table_entry *pt; + dma_addr_t addr; + + if (WARN_ON(!pd->page_table[j])) + continue; + + pt = pd->page_table[j]; + addr = pt->daddr; + if (addr) pci_unmap_page(hwdev, addr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); @@ -474,25 +593,20 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) static int gen8_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) { - int i, j; + int i, ret; for (i = 0; i < ppgtt->num_pd_pages; i++) { - struct i915_page_directory_entry *pd = &ppgtt->pdp.page_directory[i]; - for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { - struct i915_page_table_entry *pt = &pd->page_table[j]; - - pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!pt->page) - goto unwind_out; - - } + ret = alloc_pt_range(ppgtt->pdp.page_directory[i], + 0, GEN8_PDES_PER_PAGE); + if (ret) + goto unwind_out; } return 0; unwind_out: while (i--) - gen8_free_page_tables(&ppgtt->pdp.page_directory[i]); + gen8_free_page_tables(ppgtt->pdp.page_directory[i]); return -ENOMEM; } @@ -503,19 +617,9 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, int i; for (i = 0; i < max_pdp; i++) { - struct i915_page_table_entry *pt; - - pt = kcalloc(GEN8_PDES_PER_PAGE, sizeof(*pt), GFP_KERNEL); - if (!pt) - goto unwind_out; - - ppgtt->pdp.page_directory[i].page = alloc_page(GFP_KERNEL); - if (!ppgtt->pdp.page_directory[i].page) { - kfree(pt); + ppgtt->pdp.page_directory[i] = alloc_pd_single(); + if (IS_ERR(ppgtt->pdp.page_directory[i])) goto unwind_out; - } - - ppgtt->pdp.page_directory[i].page_table = pt; } ppgtt->num_pd_pages = max_pdp; @@ -524,10 +628,8 @@ static int gen8_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt, return 0; unwind_out: - while (i--) { - kfree(ppgtt->pdp.page_directory[i].page_table); - __free_page(ppgtt->pdp.page_directory[i].page); - } + while (i--) + unmap_and_free_pd(ppgtt->pdp.page_directory[i]); return -ENOMEM; } @@ -561,14 +663,14 @@ static int gen8_ppgtt_setup_page_directories(struct i915_hw_ppgtt *ppgtt, int ret; pd_addr = pci_map_page(ppgtt->base.dev->pdev, - ppgtt->pdp.page_directory[pd].page, 0, + ppgtt->pdp.page_directory[pd]->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); if (ret) return ret; - ppgtt->pdp.page_directory[pd].daddr = pd_addr; + ppgtt->pdp.page_directory[pd]->daddr = pd_addr; return 0; } @@ -578,8 +680,8 @@ static int gen8_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt, const int pt) { dma_addr_t pt_addr; - struct i915_page_directory_entry *pdir = &ppgtt->pdp.page_directory[pd]; - struct i915_page_table_entry *ptab = &pdir->page_table[pt]; + struct i915_page_directory_entry *pdir = ppgtt->pdp.page_directory[pd]; + struct i915_page_table_entry *ptab = pdir->page_table[pt]; struct page *p = ptab->page; int ret; @@ -642,10 +744,12 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) * will never need to touch the PDEs again. */ for (i = 0; i < max_pdp; i++) { + struct i915_page_directory_entry *pd = ppgtt->pdp.page_directory[i]; gen8_ppgtt_pde_t *pd_vaddr; - pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i].page); + pd_vaddr = kmap_atomic(ppgtt->pdp.page_directory[i]->page); for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { - dma_addr_t addr = ppgtt->pdp.page_directory[i].page_table[j].daddr; + struct i915_page_table_entry *pt = pd->page_table[j]; + dma_addr_t addr = pt->daddr; pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, I915_CACHE_LLC); } @@ -696,7 +800,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) for (pde = 0; pde < ppgtt->num_pd_entries; pde++) { u32 expected; gen6_gtt_pte_t *pt_vaddr; - dma_addr_t pt_addr = ppgtt->pd.page_table[pde].daddr; + dma_addr_t pt_addr = ppgtt->pd.page_table[pde]->daddr; pd_entry = readl(pd_addr + pde); expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); @@ -707,7 +811,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[pde]->page); for (pte = 0; pte < I915_PPGTT_PT_ENTRIES; pte+=4) { unsigned long va = (pde * PAGE_SIZE * I915_PPGTT_PT_ENTRIES) + @@ -746,7 +850,7 @@ static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) for (i = 0; i < ppgtt->num_pd_entries; i++) { dma_addr_t pt_addr; - pt_addr = ppgtt->pd.page_table[i].daddr; + pt_addr = ppgtt->pd.page_table[i]->daddr; pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); pd_entry |= GEN6_PDE_VALID; @@ -922,7 +1026,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, if (last_pte > I915_PPGTT_PT_ENTRIES) last_pte = I915_PPGTT_PT_ENTRIES; - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; @@ -951,7 +1055,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { if (pt_vaddr == NULL) - pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt].page); + pt_vaddr = kmap_atomic(ppgtt->pd.page_table[act_pt]->page); pt_vaddr[act_pte] = vm->pte_encode(sg_page_iter_dma_address(&sg_iter), @@ -974,7 +1078,7 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) for (i = 0; i < ppgtt->num_pd_entries; i++) pci_unmap_page(ppgtt->base.dev->pdev, - ppgtt->pd.page_table[i].daddr, + ppgtt->pd.page_table[i]->daddr, 4096, PCI_DMA_BIDIRECTIONAL); } @@ -983,9 +1087,9 @@ static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt) int i; for (i = 0; i < ppgtt->num_pd_entries; i++) - if (ppgtt->pd.page_table[i].page) - __free_page(ppgtt->pd.page_table[i].page); - kfree(ppgtt->pd.page_table); + unmap_and_free_pt(ppgtt->pd.page_table[i]); + + unmap_and_free_pd(&ppgtt->pd); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -1040,28 +1144,6 @@ alloc: return 0; } -static int gen6_ppgtt_allocate_page_tables(struct i915_hw_ppgtt *ppgtt) -{ - struct i915_page_table_entry *pt; - int i; - - pt = kcalloc(ppgtt->num_pd_entries, sizeof(*pt), GFP_KERNEL); - if (!pt) - return -ENOMEM; - - ppgtt->pd.page_table = pt; - - for (i = 0; i < ppgtt->num_pd_entries; i++) { - pt[i].page = alloc_page(GFP_KERNEL); - if (!pt->page) { - gen6_ppgtt_free(ppgtt); - return -ENOMEM; - } - } - - return 0; -} - static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) { int ret; @@ -1070,7 +1152,7 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ret = gen6_ppgtt_allocate_page_tables(ppgtt); + ret = alloc_pt_range(&ppgtt->pd, 0, ppgtt->num_pd_entries); if (ret) { drm_mm_remove_node(&ppgtt->node); return ret; @@ -1088,7 +1170,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) struct page *page; dma_addr_t pt_addr; - page = ppgtt->pd.page_table[i].page; + page = ppgtt->pd.page_table[i]->page; pt_addr = pci_map_page(dev->pdev, page, 0, 4096, PCI_DMA_BIDIRECTIONAL); @@ -1097,7 +1179,7 @@ static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt) return -EIO; } - ppgtt->pd.page_table[i].daddr = pt_addr; + ppgtt->pd.page_table[i]->daddr = pt_addr; } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 1144b709..c9e93f5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -199,12 +199,12 @@ struct i915_page_directory_entry { dma_addr_t daddr; }; - struct i915_page_table_entry *page_table; + struct i915_page_table_entry *page_table[GEN6_PPGTT_PD_ENTRIES]; /* PDEs */ }; struct i915_page_directory_pointer_entry { /* struct page *page; */ - struct i915_page_directory_entry page_directory[GEN8_LEGACY_PDPES]; + struct i915_page_directory_entry *page_directory[GEN8_LEGACY_PDPES]; }; struct i915_address_space { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9e71992..bc9c7c3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1735,14 +1735,14 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); - reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3].daddr); - reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3].daddr); - reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2].daddr); - reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2].daddr); - reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1].daddr); - reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1].daddr); - reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0].daddr); - reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0].daddr); + reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[3]->daddr); + reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[3]->daddr); + reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[2]->daddr); + reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[2]->daddr); + reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[1]->daddr); + reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[1]->daddr); + reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pdp.page_directory[0]->daddr); + reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pdp.page_directory[0]->daddr); if (ring->id == RCS) { reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;