Message ID | 1392840388-6531-1-git-send-email-benjamin.widawsky@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Wed, 2014-02-19 at 12:06 -0800, Ben Widawsky wrote: > Create 3 clear stages in PPGTT init. This will help with upcoming > changes be more readable. The 3 stages are, allocation, dma mapping, and > writing the P[DT]Es > > One nice benefit to the patches is that it makes 2 very clear error > points, allocation, and mapping, and avoids having to do any handling > after writing PTEs (something which was likely buggy before). This > simplified error handling I suspect will be helpful when we move to > deferred/dynamic page table allocation and mapping. > > The patches also attempts to break up some of the steps into more > logical reviewable chunks, particularly when we free. > > v2: Don't call cleanup on the error path since that takes down the > drm_mm and list entry, which aren't setup at this point. > > v3: Fixes addressing Imre's comments from: > <1392821989.19792.13.camel@intelbox> > > Don't do dynamic allocation for the page table DMA addresses. I can't > remember why I did it in the first place. This addresses one of Imre's > other issues. > > Fix error path leak of page tables. > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 114 ++++++++++++++++++++---------------- > 1 file changed, 64 insertions(+), 50 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index e414d7e..03f586aa 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -333,6 +333,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > > static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > { > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > int i, j; > > for (i = 0; i < ppgtt->num_pd_pages; i++) { > @@ -341,18 +342,14 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > if (!ppgtt->pd_dma_addr[i]) > continue; > > - pci_unmap_page(ppgtt->base.dev->pdev, > - ppgtt->pd_dma_addr[i], > - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; > if (addr) > - pci_unmap_page(ppgtt->base.dev->pdev, > - addr, > - PAGE_SIZE, > - PCI_DMA_BIDIRECTIONAL); > - > + pci_unmap_page(hwdev, addr, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > } > } > } > @@ -370,27 +367,27 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > } > > /** > - * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a > - * net effect resembling a 2-level page table in normal x86 terms. Each PDP > - * represents 1GB of memory > - * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. > + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > + * with a net effect resembling a 2-level page table in normal x86 terms. Each > + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address > + * space. > * > + * FIXME: split allocation into smaller pieces. For now we only ever do this > + * once, but with full PPGTT, the multiple contiguous allocations will be bad. > * TODO: Do something with the size parameter > - **/ > + */ > static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > { > struct page *pt_pages; > - int i, j, ret = -ENOMEM; > const int max_pdp = DIV_ROUND_UP(size, 1 << 30); > const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > + int i, j, ret; > > if (size % (1<<30)) > DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); > > - /* FIXME: split allocation into smaller pieces. For now we only ever do > - * this once, but with full PPGTT, the multiple contiguous allocations > - * will be bad. > - */ > + /* 1. Do all our allocations for page directories and page tables */ > ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); > if (!ppgtt->pd_pages) > return -ENOMEM; > @@ -405,52 +402,60 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); > ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); > ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; > - ppgtt->enable = gen8_ppgtt_enable; > - ppgtt->switch_mm = gen8_mm_switch; > - ppgtt->base.clear_range = gen8_ppgtt_clear_range; > - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; > - ppgtt->base.cleanup = gen8_ppgtt_cleanup; > - ppgtt->base.start = 0; > - ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; > - > BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); > > + for (i = 0; i < max_pdp; i++) { > + ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, > + sizeof(dma_addr_t), > + GFP_KERNEL); > + if (!ppgtt->gen8_pt_dma_addr[i]) { > + ret = -ENOMEM; > + goto bail; > + } > + } > + > /* > - * - Create a mapping for the page directories. > - * - For each page directory: > - * allocate space for page table mappings. > - * map each page table > + * 2. Create all the DMA mappings for the page directories and page > + * tables > */ > for (i = 0; i < max_pdp; i++) { > - dma_addr_t temp; > - temp = pci_map_page(ppgtt->base.dev->pdev, > - &ppgtt->pd_pages[i], 0, > - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > - goto err_out; > + dma_addr_t pd_addr, pt_addr; > > - ppgtt->pd_dma_addr[i] = temp; > + /* And the page directory mappings */ > + pd_addr = pci_map_page(hwdev, &ppgtt->pd_pages[i], 0, > + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); > + if (ret) > + goto bail; > > - ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); > - if (!ppgtt->gen8_pt_dma_addr[i]) > - goto err_out; > + ppgtt->pd_dma_addr[i] = pd_addr; > > + /* Get the page table mappings per page directory */ > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; > - temp = pci_map_page(ppgtt->base.dev->pdev, > - p, 0, PAGE_SIZE, > - PCI_DMA_BIDIRECTIONAL); > > - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > - goto err_out; > + pt_addr = pci_map_page(hwdev, p, 0, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + ret = pci_dma_mapping_error(hwdev, pt_addr); > + if (ret) { > + ppgtt->pd_dma_addr[i] = 0; > + pci_unmap_page(hwdev, pd_addr, PAGE_SIZE, > + PCI_DMA_BIDIRECTIONAL); > + goto bail; I think this would still leave the ppgtt->gen8_pt_dma_addr[i][0 .. j-1] mapped on error. Simply doing if (ret) goto bail; would be ok imo. With that fixed this patch is: Reviewed-by: Imre Deak <imre.deak@intel.com> > + } > > - ppgtt->gen8_pt_dma_addr[i][j] = temp; > + ppgtt->gen8_pt_dma_addr[i][j] = pt_addr; > } > } > > - /* For now, the PPGTT helper functions all require that the PDEs are > + /* > + * 3. Map all the page directory entires to point to the page tables > + * we've allocated. > + * > + * For now, the PPGTT helper functions all require that the PDEs are > * plugged in correctly. So we do that now/here. For aliasing PPGTT, we > - * will never need to touch the PDEs again */ > + * will never need to touch the PDEs again. > + */ > for (i = 0; i < max_pdp; i++) { > gen8_ppgtt_pde_t *pd_vaddr; > pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); > @@ -462,6 +467,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > kunmap_atomic(pd_vaddr); > } > > + ppgtt->enable = gen8_ppgtt_enable; > + ppgtt->switch_mm = gen8_mm_switch; > + ppgtt->base.clear_range = gen8_ppgtt_clear_range; > + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; > + ppgtt->base.cleanup = gen8_ppgtt_cleanup; > + ppgtt->base.start = 0; > + ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; > + > ppgtt->base.clear_range(&ppgtt->base, 0, > ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, > true); > @@ -474,8 +487,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > size % (1<<30)); > return 0; > > -err_out: > - ppgtt->base.cleanup(&ppgtt->base); > +bail: > + gen8_ppgtt_unmap_pages(ppgtt); > + gen8_ppgtt_free(ppgtt); > return ret; > } >
On Wed, Feb 19, 2014 at 11:00:17PM +0200, Imre Deak wrote: > On Wed, 2014-02-19 at 12:06 -0800, Ben Widawsky wrote: > > Create 3 clear stages in PPGTT init. This will help with upcoming > > changes be more readable. The 3 stages are, allocation, dma mapping, and > > writing the P[DT]Es > > > > One nice benefit to the patches is that it makes 2 very clear error > > points, allocation, and mapping, and avoids having to do any handling > > after writing PTEs (something which was likely buggy before). This > > simplified error handling I suspect will be helpful when we move to > > deferred/dynamic page table allocation and mapping. > > > > The patches also attempts to break up some of the steps into more > > logical reviewable chunks, particularly when we free. > > > > v2: Don't call cleanup on the error path since that takes down the > > drm_mm and list entry, which aren't setup at this point. > > > > v3: Fixes addressing Imre's comments from: > > <1392821989.19792.13.camel@intelbox> > > > > Don't do dynamic allocation for the page table DMA addresses. I can't > > remember why I did it in the first place. This addresses one of Imre's > > other issues. > > > > Fix error path leak of page tables. > > > > Signed-off-by: Ben Widawsky <ben@bwidawsk.net> > > --- > > drivers/gpu/drm/i915/i915_gem_gtt.c | 114 ++++++++++++++++++++---------------- > > 1 file changed, 64 insertions(+), 50 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index e414d7e..03f586aa 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -333,6 +333,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) > > > > static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > > { > > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > > int i, j; > > > > for (i = 0; i < ppgtt->num_pd_pages; i++) { > > @@ -341,18 +342,14 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) > > if (!ppgtt->pd_dma_addr[i]) > > continue; > > > > - pci_unmap_page(ppgtt->base.dev->pdev, > > - ppgtt->pd_dma_addr[i], > > - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > > + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, > > + PCI_DMA_BIDIRECTIONAL); > > > > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > > dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; > > if (addr) > > - pci_unmap_page(ppgtt->base.dev->pdev, > > - addr, > > - PAGE_SIZE, > > - PCI_DMA_BIDIRECTIONAL); > > - > > + pci_unmap_page(hwdev, addr, PAGE_SIZE, > > + PCI_DMA_BIDIRECTIONAL); > > } > > } > > } > > @@ -370,27 +367,27 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > > } > > > > /** > > - * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a > > - * net effect resembling a 2-level page table in normal x86 terms. Each PDP > > - * represents 1GB of memory > > - * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. > > + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers > > + * with a net effect resembling a 2-level page table in normal x86 terms. Each > > + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address > > + * space. > > * > > + * FIXME: split allocation into smaller pieces. For now we only ever do this > > + * once, but with full PPGTT, the multiple contiguous allocations will be bad. > > * TODO: Do something with the size parameter > > - **/ > > + */ > > static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > { > > struct page *pt_pages; > > - int i, j, ret = -ENOMEM; > > const int max_pdp = DIV_ROUND_UP(size, 1 << 30); > > const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; > > + struct pci_dev *hwdev = ppgtt->base.dev->pdev; > > + int i, j, ret; > > > > if (size % (1<<30)) > > DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); > > > > - /* FIXME: split allocation into smaller pieces. For now we only ever do > > - * this once, but with full PPGTT, the multiple contiguous allocations > > - * will be bad. > > - */ > > + /* 1. Do all our allocations for page directories and page tables */ > > ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); > > if (!ppgtt->pd_pages) > > return -ENOMEM; > > @@ -405,52 +402,60 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); > > ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); > > ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; > > - ppgtt->enable = gen8_ppgtt_enable; > > - ppgtt->switch_mm = gen8_mm_switch; > > - ppgtt->base.clear_range = gen8_ppgtt_clear_range; > > - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; > > - ppgtt->base.cleanup = gen8_ppgtt_cleanup; > > - ppgtt->base.start = 0; > > - ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; > > - > > BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); > > > > + for (i = 0; i < max_pdp; i++) { > > + ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, > > + sizeof(dma_addr_t), > > + GFP_KERNEL); > > + if (!ppgtt->gen8_pt_dma_addr[i]) { > > + ret = -ENOMEM; > > + goto bail; > > + } > > + } > > + > > /* > > - * - Create a mapping for the page directories. > > - * - For each page directory: > > - * allocate space for page table mappings. > > - * map each page table > > + * 2. Create all the DMA mappings for the page directories and page > > + * tables > > */ > > for (i = 0; i < max_pdp; i++) { > > - dma_addr_t temp; > > - temp = pci_map_page(ppgtt->base.dev->pdev, > > - &ppgtt->pd_pages[i], 0, > > - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > > - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > > - goto err_out; > > + dma_addr_t pd_addr, pt_addr; > > > > - ppgtt->pd_dma_addr[i] = temp; > > + /* And the page directory mappings */ > > + pd_addr = pci_map_page(hwdev, &ppgtt->pd_pages[i], 0, > > + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); > > + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); > > + if (ret) > > + goto bail; > > > > - ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); > > - if (!ppgtt->gen8_pt_dma_addr[i]) > > - goto err_out; > > + ppgtt->pd_dma_addr[i] = pd_addr; > > > > + /* Get the page table mappings per page directory */ > > for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { > > struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; > > - temp = pci_map_page(ppgtt->base.dev->pdev, > > - p, 0, PAGE_SIZE, > > - PCI_DMA_BIDIRECTIONAL); > > > > - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) > > - goto err_out; > > + pt_addr = pci_map_page(hwdev, p, 0, PAGE_SIZE, > > + PCI_DMA_BIDIRECTIONAL); > > + ret = pci_dma_mapping_error(hwdev, pt_addr); > > + if (ret) { > > + ppgtt->pd_dma_addr[i] = 0; > > + pci_unmap_page(hwdev, pd_addr, PAGE_SIZE, > > + PCI_DMA_BIDIRECTIONAL); > > + goto bail; > > I think this would still leave the ppgtt->gen8_pt_dma_addr[i][0 .. j-1] > mapped on error. Simply doing if (ret) goto bail; would be ok imo. With > that fixed this patch is: > > Reviewed-by: Imre Deak <imre.deak@intel.com> > Yep, I was just testing you :D. My fix traded one leak for another (it'd skip unmapping J page tables). > > + } > > > > - ppgtt->gen8_pt_dma_addr[i][j] = temp; > > + ppgtt->gen8_pt_dma_addr[i][j] = pt_addr; > > } > > } > > > > - /* For now, the PPGTT helper functions all require that the PDEs are > > + /* > > + * 3. Map all the page directory entires to point to the page tables > > + * we've allocated. > > + * > > + * For now, the PPGTT helper functions all require that the PDEs are > > * plugged in correctly. So we do that now/here. For aliasing PPGTT, we > > - * will never need to touch the PDEs again */ > > + * will never need to touch the PDEs again. > > + */ > > for (i = 0; i < max_pdp; i++) { > > gen8_ppgtt_pde_t *pd_vaddr; > > pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); > > @@ -462,6 +467,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > kunmap_atomic(pd_vaddr); > > } > > > > + ppgtt->enable = gen8_ppgtt_enable; > > + ppgtt->switch_mm = gen8_mm_switch; > > + ppgtt->base.clear_range = gen8_ppgtt_clear_range; > > + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; > > + ppgtt->base.cleanup = gen8_ppgtt_cleanup; > > + ppgtt->base.start = 0; > > + ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; > > + > > ppgtt->base.clear_range(&ppgtt->base, 0, > > ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, > > true); > > @@ -474,8 +487,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) > > size % (1<<30)); > > return 0; > > > > -err_out: > > - ppgtt->base.cleanup(&ppgtt->base); > > +bail: > > + gen8_ppgtt_unmap_pages(ppgtt); > > + gen8_ppgtt_free(ppgtt); > > return ret; > > } > > > >
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e414d7e..03f586aa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -333,6 +333,7 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) { + struct pci_dev *hwdev = ppgtt->base.dev->pdev; int i, j; for (i = 0; i < ppgtt->num_pd_pages; i++) { @@ -341,18 +342,14 @@ static void gen8_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) if (!ppgtt->pd_dma_addr[i]) continue; - pci_unmap_page(ppgtt->base.dev->pdev, - ppgtt->pd_dma_addr[i], - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + pci_unmap_page(hwdev, ppgtt->pd_dma_addr[i], PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; if (addr) - pci_unmap_page(ppgtt->base.dev->pdev, - addr, - PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - + pci_unmap_page(hwdev, addr, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); } } } @@ -370,27 +367,27 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) } /** - * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a - * net effect resembling a 2-level page table in normal x86 terms. Each PDP - * represents 1GB of memory - * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. + * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers + * with a net effect resembling a 2-level page table in normal x86 terms. Each + * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address + * space. * + * FIXME: split allocation into smaller pieces. For now we only ever do this + * once, but with full PPGTT, the multiple contiguous allocations will be bad. * TODO: Do something with the size parameter - **/ + */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) { struct page *pt_pages; - int i, j, ret = -ENOMEM; const int max_pdp = DIV_ROUND_UP(size, 1 << 30); const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + struct pci_dev *hwdev = ppgtt->base.dev->pdev; + int i, j, ret; if (size % (1<<30)) DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); - /* FIXME: split allocation into smaller pieces. For now we only ever do - * this once, but with full PPGTT, the multiple contiguous allocations - * will be bad. - */ + /* 1. Do all our allocations for page directories and page tables */ ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); if (!ppgtt->pd_pages) return -ENOMEM; @@ -405,52 +402,60 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; - ppgtt->enable = gen8_ppgtt_enable; - ppgtt->switch_mm = gen8_mm_switch; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.start = 0; - ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; - BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); + for (i = 0; i < max_pdp; i++) { + ppgtt->gen8_pt_dma_addr[i] = kcalloc(GEN8_PDES_PER_PAGE, + sizeof(dma_addr_t), + GFP_KERNEL); + if (!ppgtt->gen8_pt_dma_addr[i]) { + ret = -ENOMEM; + goto bail; + } + } + /* - * - Create a mapping for the page directories. - * - For each page directory: - * allocate space for page table mappings. - * map each page table + * 2. Create all the DMA mappings for the page directories and page + * tables */ for (i = 0; i < max_pdp; i++) { - dma_addr_t temp; - temp = pci_map_page(ppgtt->base.dev->pdev, - &ppgtt->pd_pages[i], 0, - PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) - goto err_out; + dma_addr_t pd_addr, pt_addr; - ppgtt->pd_dma_addr[i] = temp; + /* And the page directory mappings */ + pd_addr = pci_map_page(hwdev, &ppgtt->pd_pages[i], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(ppgtt->base.dev->pdev, pd_addr); + if (ret) + goto bail; - ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); - if (!ppgtt->gen8_pt_dma_addr[i]) - goto err_out; + ppgtt->pd_dma_addr[i] = pd_addr; + /* Get the page table mappings per page directory */ for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; - temp = pci_map_page(ppgtt->base.dev->pdev, - p, 0, PAGE_SIZE, - PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) - goto err_out; + pt_addr = pci_map_page(hwdev, p, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + ret = pci_dma_mapping_error(hwdev, pt_addr); + if (ret) { + ppgtt->pd_dma_addr[i] = 0; + pci_unmap_page(hwdev, pd_addr, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + goto bail; + } - ppgtt->gen8_pt_dma_addr[i][j] = temp; + ppgtt->gen8_pt_dma_addr[i][j] = pt_addr; } } - /* For now, the PPGTT helper functions all require that the PDEs are + /* + * 3. Map all the page directory entires to point to the page tables + * we've allocated. + * + * For now, the PPGTT helper functions all require that the PDEs are * plugged in correctly. So we do that now/here. For aliasing PPGTT, we - * will never need to touch the PDEs again */ + * will never need to touch the PDEs again. + */ for (i = 0; i < max_pdp; i++) { gen8_ppgtt_pde_t *pd_vaddr; pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); @@ -462,6 +467,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) kunmap_atomic(pd_vaddr); } + ppgtt->enable = gen8_ppgtt_enable; + ppgtt->switch_mm = gen8_mm_switch; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.start = 0; + ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE; + ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, true); @@ -474,8 +487,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) size % (1<<30)); return 0; -err_out: - ppgtt->base.cleanup(&ppgtt->base); +bail: + gen8_ppgtt_unmap_pages(ppgtt); + gen8_ppgtt_free(ppgtt); return ret; }
Create 3 clear stages in PPGTT init. This will help with upcoming changes be more readable. The 3 stages are, allocation, dma mapping, and writing the P[DT]Es One nice benefit to the patches is that it makes 2 very clear error points, allocation, and mapping, and avoids having to do any handling after writing PTEs (something which was likely buggy before). This simplified error handling I suspect will be helpful when we move to deferred/dynamic page table allocation and mapping. The patches also attempts to break up some of the steps into more logical reviewable chunks, particularly when we free. v2: Don't call cleanup on the error path since that takes down the drm_mm and list entry, which aren't setup at this point. v3: Fixes addressing Imre's comments from: <1392821989.19792.13.camel@intelbox> Don't do dynamic allocation for the page table DMA addresses. I can't remember why I did it in the first place. This addresses one of Imre's other issues. Fix error path leak of page tables. Signed-off-by: Ben Widawsky <ben@bwidawsk.net> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 114 ++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 50 deletions(-)