Message ID | 20170621203345.26320-11-matthew.auld@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Quoting Matthew Auld (2017-06-21 21:33:36) > Support inserting 1G gtt pages into the 48b PPGTT. > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > --- > drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++++++++++++--- > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > 2 files changed, 70 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index de67084d5fcf..6fe10ee7dca8 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -922,6 +922,65 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > cache_level); > } > > +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > + struct i915_page_directory_pointer **pdps, > + struct sgt_dma *iter, > + enum i915_cache_level cache_level) > +{ > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); > + u64 start = vma->node.start; > + > + do { > + struct gen8_insert_pte idx = gen8_insert_pte(start); > + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; > + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; > + struct i915_page_table *pt = pd->page_table[idx.pde]; > + dma_addr_t rem = iter->max - iter->dma; > + unsigned int page_size; > + gen8_pte_t encode = pte_encode; > + gen8_pte_t *vaddr; > + u16 index, max; > + > + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && > + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && > + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { > + vaddr = kmap_atomic_px(pdp); > + index = idx.pdpe; > + max = GEN8_PML4ES_PER_PML4; > + page_size = I915_GTT_PAGE_SIZE_1G; > + encode |= GEN8_PDPE_PS_1G; > + } else { > + vaddr = kmap_atomic_px(pt); > + index = idx.pte; > + max = GEN8_PTES; > + page_size = I915_GTT_PAGE_SIZE; > + } > + > + do { > + vaddr[index++] = encode | iter->dma; > + > + start += page_size; > + iter->dma += page_size; > + if (iter->dma >= iter->max) { > + iter->sg = __sg_next(iter->sg); > + if (!iter->sg) > + break; > + > + iter->dma = sg_dma_address(iter->sg); > + iter->max = iter->dma + iter->sg->length; > + > + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) > + break; > + } > + rem = iter->max - iter->dma; > + > + } while (rem >= page_size && index < max); Where does idx advance? > + > + kunmap_atomic(vaddr); > + > + } while (iter->sg); > +}
Quoting Chris Wilson (2017-06-21 22:49:07) > Quoting Matthew Auld (2017-06-21 21:33:36) > > Support inserting 1G gtt pages into the 48b PPGTT. > > > > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > > --- > > drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++++++++++++--- > > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > > 2 files changed, 70 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > > index de67084d5fcf..6fe10ee7dca8 100644 > > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > > @@ -922,6 +922,65 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > > cache_level); > > } > > > > +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > > + struct i915_page_directory_pointer **pdps, > > + struct sgt_dma *iter, > > + enum i915_cache_level cache_level) > > +{ > > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); > > + u64 start = vma->node.start; > > + > > + do { > > + struct gen8_insert_pte idx = gen8_insert_pte(start); > > + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; > > + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; > > + struct i915_page_table *pt = pd->page_table[idx.pde]; > > + dma_addr_t rem = iter->max - iter->dma; > > + unsigned int page_size; > > + gen8_pte_t encode = pte_encode; > > + gen8_pte_t *vaddr; > > + u16 index, max; > > + > > + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && > > + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && > > + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { > > + vaddr = kmap_atomic_px(pdp); > > + index = idx.pdpe; > > + max = GEN8_PML4ES_PER_PML4; > > + page_size = I915_GTT_PAGE_SIZE_1G; > > + encode |= GEN8_PDPE_PS_1G; > > + } else { > > + vaddr = kmap_atomic_px(pt); > > + index = idx.pte; > > + max = GEN8_PTES; > > + page_size = I915_GTT_PAGE_SIZE; > > + } > > + > > + do { > > + vaddr[index++] = encode | iter->dma; > > + > > + start += page_size; > > + iter->dma += page_size; > > + if (iter->dma >= iter->max) { > > + iter->sg = __sg_next(iter->sg); > > + if (!iter->sg) > > + break; > > + GEM_BUG_ON(iter->sg->length < page_size); > > + iter->dma = sg_dma_address(iter->sg); > > + iter->max = iter->dma + iter->sg->length; > > + > > + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) > > + break; > > + } > > + rem = iter->max - iter->dma; > > + > > + } while (rem >= page_size && index < max); > > Where does idx advance? via start. -Chris
On 21 June 2017 at 23:51, Chris Wilson <chris@chris-wilson.co.uk> wrote: > Quoting Chris Wilson (2017-06-21 22:49:07) >> Quoting Matthew Auld (2017-06-21 21:33:36) >> > Support inserting 1G gtt pages into the 48b PPGTT. >> > >> > Signed-off-by: Matthew Auld <matthew.auld@intel.com> >> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> >> > --- >> > drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++++++++++++--- >> > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ >> > 2 files changed, 70 insertions(+), 4 deletions(-) >> > >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c >> > index de67084d5fcf..6fe10ee7dca8 100644 >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c >> > @@ -922,6 +922,65 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, >> > cache_level); >> > } >> > >> > +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, >> > + struct i915_page_directory_pointer **pdps, >> > + struct sgt_dma *iter, >> > + enum i915_cache_level cache_level) >> > +{ >> > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); >> > + u64 start = vma->node.start; >> > + >> > + do { >> > + struct gen8_insert_pte idx = gen8_insert_pte(start); >> > + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; >> > + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; >> > + struct i915_page_table *pt = pd->page_table[idx.pde]; >> > + dma_addr_t rem = iter->max - iter->dma; >> > + unsigned int page_size; >> > + gen8_pte_t encode = pte_encode; >> > + gen8_pte_t *vaddr; >> > + u16 index, max; >> > + >> > + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && >> > + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && >> > + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { >> > + vaddr = kmap_atomic_px(pdp); >> > + index = idx.pdpe; >> > + max = GEN8_PML4ES_PER_PML4; >> > + page_size = I915_GTT_PAGE_SIZE_1G; >> > + encode |= GEN8_PDPE_PS_1G; >> > + } else { >> > + vaddr = kmap_atomic_px(pt); >> > + index = idx.pte; >> > + max = GEN8_PTES; >> > + page_size = I915_GTT_PAGE_SIZE; >> > + } >> > + >> > + do { >> > + vaddr[index++] = encode | iter->dma; >> > + >> > + start += page_size; >> > + iter->dma += page_size; >> > + if (iter->dma >= iter->max) { >> > + iter->sg = __sg_next(iter->sg); >> > + if (!iter->sg) >> > + break; >> > + > > GEM_BUG_ON(iter->sg->length < page_size); That should be expected behaviour, in that we need to downgrade to a smaller page size on the next iteration. > >> > + iter->dma = sg_dma_address(iter->sg); >> > + iter->max = iter->dma + iter->sg->length; >> > + >> > + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) >> > + break; >> > + } >> > + rem = iter->max - iter->dma; >> > + >> > + } while (rem >= page_size && index < max); >> >> Where does idx advance? > > via start. > -Chris > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Quoting Matthew Auld (2017-06-22 12:07:55) > On 21 June 2017 at 23:51, Chris Wilson <chris@chris-wilson.co.uk> wrote: > > Quoting Chris Wilson (2017-06-21 22:49:07) > >> Quoting Matthew Auld (2017-06-21 21:33:36) > >> > Support inserting 1G gtt pages into the 48b PPGTT. > >> > > >> > Signed-off-by: Matthew Auld <matthew.auld@intel.com> > >> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > >> > --- > >> > drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++++++++++++--- > >> > drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ > >> > 2 files changed, 70 insertions(+), 4 deletions(-) > >> > > >> > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> > index de67084d5fcf..6fe10ee7dca8 100644 > >> > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > >> > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > >> > @@ -922,6 +922,65 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > >> > cache_level); > >> > } > >> > > >> > +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > >> > + struct i915_page_directory_pointer **pdps, > >> > + struct sgt_dma *iter, > >> > + enum i915_cache_level cache_level) > >> > +{ > >> > + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); > >> > + u64 start = vma->node.start; > >> > + > >> > + do { > >> > + struct gen8_insert_pte idx = gen8_insert_pte(start); > >> > + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; > >> > + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; > >> > + struct i915_page_table *pt = pd->page_table[idx.pde]; > >> > + dma_addr_t rem = iter->max - iter->dma; > >> > + unsigned int page_size; > >> > + gen8_pte_t encode = pte_encode; > >> > + gen8_pte_t *vaddr; > >> > + u16 index, max; > >> > + > >> > + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && > >> > + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && > >> > + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { > >> > + vaddr = kmap_atomic_px(pdp); > >> > + index = idx.pdpe; > >> > + max = GEN8_PML4ES_PER_PML4; > >> > + page_size = I915_GTT_PAGE_SIZE_1G; > >> > + encode |= GEN8_PDPE_PS_1G; > >> > + } else { > >> > + vaddr = kmap_atomic_px(pt); > >> > + index = idx.pte; > >> > + max = GEN8_PTES; > >> > + page_size = I915_GTT_PAGE_SIZE; > >> > + } > >> > + > >> > + do { > >> > + vaddr[index++] = encode | iter->dma; > >> > + > >> > + start += page_size; > >> > + iter->dma += page_size; > >> > + if (iter->dma >= iter->max) { > >> > + iter->sg = __sg_next(iter->sg); > >> > + if (!iter->sg) > >> > + break; > >> > + > > > > GEM_BUG_ON(iter->sg->length < page_size); > > That should be expected behaviour, in that we need to downgrade to a > smaller page size on the next iteration. It still applies to just above where we set vaddr[index]. It fails here because we have yet decided on our course of action. I still think there is merit in having a confirmation that sg->length does meet our criteria, considering that we set the page_sizes a long time ago. -Chris
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index de67084d5fcf..6fe10ee7dca8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -922,6 +922,65 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, cache_level); } +static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, + struct i915_page_directory_pointer **pdps, + struct sgt_dma *iter, + enum i915_cache_level cache_level) +{ + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + u64 start = vma->node.start; + + do { + struct gen8_insert_pte idx = gen8_insert_pte(start); + struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; + struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + struct i915_page_table *pt = pd->page_table[idx.pde]; + dma_addr_t rem = iter->max - iter->dma; + unsigned int page_size; + gen8_pte_t encode = pte_encode; + gen8_pte_t *vaddr; + u16 index, max; + + if (unlikely(vma->page_sizes.sg & I915_GTT_PAGE_SIZE_1G) && + IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_1G) && + rem >= I915_GTT_PAGE_SIZE_1G && !(idx.pte | idx.pde)) { + vaddr = kmap_atomic_px(pdp); + index = idx.pdpe; + max = GEN8_PML4ES_PER_PML4; + page_size = I915_GTT_PAGE_SIZE_1G; + encode |= GEN8_PDPE_PS_1G; + } else { + vaddr = kmap_atomic_px(pt); + index = idx.pte; + max = GEN8_PTES; + page_size = I915_GTT_PAGE_SIZE; + } + + do { + vaddr[index++] = encode | iter->dma; + + start += page_size; + iter->dma += page_size; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) + break; + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + + if (unlikely(!IS_ALIGNED(iter->dma, page_size))) + break; + } + rem = iter->max - iter->dma; + + } while (rem >= page_size && index < max); + + kunmap_atomic(vaddr); + + } while (iter->sg); +} + static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, @@ -934,11 +993,16 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, .max = iter.dma + iter.sg->length, }; struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { + gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level); + } else { + struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + &iter, &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + } } static void gen8_free_page_tables(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 4c2f7d7c1e7d..0d31b46cde03 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -149,6 +149,8 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) +#define GEN8_PDPE_PS_1G BIT(7) + struct sg_table; struct intel_rotation_info {
Support inserting 1G gtt pages into the 48b PPGTT. Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 ++ 2 files changed, 70 insertions(+), 4 deletions(-)