diff mbox

[13/22] drm/i915: add support for 64K scratch page

Message ID 20170815181215.18310-14-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Matthew Auld Aug. 15, 2017, 6:12 p.m. UTC
Before we can fully enable 64K pages, we need to first support a 64K
scratch page if we intend to support the case where we have object sizes
< 2M, since any scratch PTE must also point to a 64K region.  Without
this our 64K usage is limited to objects which completely fill the
page-table, and therefore don't need any scratch.

Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
 2 files changed, 60 insertions(+), 2 deletions(-)

Comments

Chris Wilson Aug. 15, 2017, 6:58 p.m. UTC | #1
Quoting Matthew Auld (2017-08-15 19:12:06)
> Before we can fully enable 64K pages, we need to first support a 64K
> scratch page if we intend to support the case where we have object sizes
> < 2M, since any scratch PTE must also point to a 64K region.  Without
> this our 64K usage is limited to objects which completely fill the
> page-table, and therefore don't need any scratch.
> 
> Reported-by: Chris Wilson <chris@chris-wilson.co.uk>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/i915_gem_gtt.h |  1 +
>  2 files changed, 60 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index aeef20a9ea0a..a2178c33586c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -475,12 +475,69 @@ static void fill_page_dma_32(struct i915_address_space *vm,
>  static int
>  setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
>  {
> -       return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
> +       struct page *page = NULL;
> +       dma_addr_t addr;
> +       int order;
> +
> +       /* In order to utilize 64K pages for an object with a size < 2M, we will
> +        * need to support a 64K scratch page, given that every 16th entry for a
> +        * page-table operating in 64K mode must point to a properly aligned 64K
> +        * region, including any PTEs which happen to point to scratch.

Needs a quick reminder here about why only for 48b vm, with a cross
reference to i915_vma_insert(). I guarantee that one day we will change
one without the other -- I hope our tests are good enough to catch that
mistake!

> +        *
> +        * XXX: should we rather make the scratch global, does it have to be
> +        * per-vm?

Depends on the level of process isolation you seek. Currently,
everything is per-process to avoid such information leaks from stray
writes (but bug free drivers!). Various mutterings have been made about
different levels of sharing, but there's never been enough
justification, it is just one of many costs we bear.

> +        */
> +       if (i915_vm_is_48bit(vm) &&
> +           HAS_PAGE_SIZE(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
> +               order = get_order(I915_GTT_PAGE_SIZE_64K);
> +               page = alloc_pages(gfp | __GFP_ZERO, order);
> +               if (page) {
> +                       addr = dma_map_page(vm->dma, page, 0,
> +                                           I915_GTT_PAGE_SIZE_64K,
> +                                           PCI_DMA_BIDIRECTIONAL);

Aside, has anyone noticed that dma_map_page does a clflush! *mutters
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index aeef20a9ea0a..a2178c33586c 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -475,12 +475,69 @@  static void fill_page_dma_32(struct i915_address_space *vm,
 static int
 setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
 {
-	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
+	struct page *page = NULL;
+	dma_addr_t addr;
+	int order;
+
+	/* In order to utilize 64K pages for an object with a size < 2M, we will
+	 * need to support a 64K scratch page, given that every 16th entry for a
+	 * page-table operating in 64K mode must point to a properly aligned 64K
+	 * region, including any PTEs which happen to point to scratch.
+	 *
+	 * XXX: should we rather make the scratch global, does it have to be
+	 * per-vm?
+	 */
+	if (i915_vm_is_48bit(vm) &&
+	    HAS_PAGE_SIZE(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
+		order = get_order(I915_GTT_PAGE_SIZE_64K);
+		page = alloc_pages(gfp | __GFP_ZERO, order);
+		if (page) {
+			addr = dma_map_page(vm->dma, page, 0,
+					    I915_GTT_PAGE_SIZE_64K,
+					    PCI_DMA_BIDIRECTIONAL);
+			if (unlikely(dma_mapping_error(vm->dma, addr))) {
+				__free_pages(page, order);
+				page = NULL;
+			}
+
+			if (!IS_ALIGNED(addr, I915_GTT_PAGE_SIZE_64K)) {
+				dma_unmap_page(vm->dma, addr,
+					       I915_GTT_PAGE_SIZE_64K,
+					       PCI_DMA_BIDIRECTIONAL);
+				__free_pages(page, order);
+				page = NULL;
+			}
+		}
+	}
+
+	if (!page) {
+		order = 0;
+		page = alloc_page(gfp | __GFP_ZERO);
+		if (unlikely(!page))
+			return -ENOMEM;
+
+		addr = dma_map_page(vm->dma, page, 0, PAGE_SIZE,
+				    PCI_DMA_BIDIRECTIONAL);
+		if (unlikely(dma_mapping_error(vm->dma, addr))) {
+			__free_page(page);
+			return -ENOMEM;
+		}
+	}
+
+	vm->scratch_page.page = page;
+	vm->scratch_page.daddr = addr;
+	vm->scratch_page.order = order;
+
+	return 0;
 }
 
 static void cleanup_scratch_page(struct i915_address_space *vm)
 {
-	cleanup_page_dma(vm, &vm->scratch_page);
+	struct i915_page_dma *p = &vm->scratch_page;
+
+	dma_unmap_page(vm->dma, p->daddr, BIT(p->order) << PAGE_SHIFT,
+		       PCI_DMA_BIDIRECTIONAL);
+	__free_pages(p->page, p->order);
 }
 
 static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index aa4488637fc9..356fec26e8c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -212,6 +212,7 @@  struct i915_vma;
 
 struct i915_page_dma {
 	struct page *page;
+	int order;
 	union {
 		dma_addr_t daddr;