Message ID | 20190611172731.19174-2-mika.kuoppala@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/9] drm/i915/gtt: No need to zero the table for page dirs | expand |
Quoting Mika Kuoppala (2019-06-11 18:27:24) > struct i915_page_table { > struct i915_page_dma base; > - atomic_t used_ptes; > + atomic_t used; > }; > > struct i915_page_directory { > struct i915_page_dma base; > - > - struct i915_page_table *page_table[I915_PDES]; /* PDEs */ > - atomic_t used_pdes; > - spinlock_t lock; > -}; > - > -struct i915_page_directory_pointer { > - struct i915_page_dma base; > - struct i915_page_directory **page_directory; > - atomic_t used_pdpes; > - spinlock_t lock; > -}; > - > -struct i915_pml4 { > - struct i915_page_dma base; > - struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; > + atomic_t used; > spinlock_t lock; > + void *entry[0]; > }; And always (albeit with a single bsw discrepancy) 512. At the very least you can alias a fixed sized variant over the top to remove the extra pointer chasing you added. -Chris
Quoting Mika Kuoppala (2019-06-11 18:27:24) > All page directories are identical in function, only the position in the > hierarchy differ. Use same base type for directory functionality. > > Cc: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> > Cc: Matthew Auld <matthew.william.auld@gmail.com> > Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> > Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> > --- > drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- > drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 2 +- > drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- > drivers/gpu/drm/i915/gvt/scheduler.c | 30 +- > drivers/gpu/drm/i915/i915_gem_gtt.c | 349 ++++++++++---------- > drivers/gpu/drm/i915/i915_gem_gtt.h | 64 ++-- > 6 files changed, 234 insertions(+), 215 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c > index c86ca9f21532..dbab0ab1cef1 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c > @@ -1038,7 +1038,7 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) > > if (i915_vm_is_4lvl(vm)) { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); > + const dma_addr_t pd_daddr = px_dma(ppgtt->pd); > > cs = intel_ring_begin(rq, 6); > if (IS_ERR(cs)) > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h > index 5ef932d810a7..6bf34738b4e5 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h > +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h > @@ -55,7 +55,7 @@ > > #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ > u32 *reg_state__ = (reg_state); \ > - const u64 addr__ = px_dma(&ppgtt->pml4); \ > + const u64 addr__ = px_dma(ppgtt->pd); \ > (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ > (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ > } while (0) > diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > index c834d016c965..3b857994943c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c > @@ -1523,7 +1523,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) > > *cs++ = MI_LOAD_REGISTER_IMM(1); > *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); > - *cs++ = ppgtt->pd.base.ggtt_offset << 10; > + *cs++ = ppgtt->pd->base.ggtt_offset << 10; > > intel_ring_advance(rq, cs); > > diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c > index e301efb18d45..f1e1261ac3db 100644 > --- a/drivers/gpu/drm/i915/gvt/scheduler.c > +++ b/drivers/gpu/drm/i915/gvt/scheduler.c > @@ -375,11 +375,13 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, > return -EINVAL; > > if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { > - px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0]; > + px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; > } else { > for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { > - px_dma(ppgtt->pdp.page_directory[i]) = > - mm->ppgtt_mm.shadow_pdps[i]; > + struct i915_page_directory * const pd = > + i915_pd_entry(ppgtt->pd, i); > + > + px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; > } > } > > @@ -1128,11 +1130,14 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, > int i; > > if (i915_vm_is_4lvl(&ppgtt->vm)) { > - px_dma(&ppgtt->pml4) = s->i915_context_pml4; > + px_dma(ppgtt->pd) = s->i915_context_pml4; > } else { > - for (i = 0; i < GEN8_3LVL_PDPES; i++) > - px_dma(ppgtt->pdp.page_directory[i]) = > - s->i915_context_pdps[i]; > + for (i = 0; i < GEN8_3LVL_PDPES; i++) { > + struct i915_page_directory * const pd = > + i915_pd_entry(ppgtt->pd, i); > + > + px_dma(pd) = s->i915_context_pdps[i]; > + } > } > } > > @@ -1186,11 +1191,14 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, > int i; > > if (i915_vm_is_4lvl(&ppgtt->vm)) { > - s->i915_context_pml4 = px_dma(&ppgtt->pml4); > + s->i915_context_pml4 = px_dma(ppgtt->pd); > } else { > - for (i = 0; i < GEN8_3LVL_PDPES; i++) > - s->i915_context_pdps[i] = > - px_dma(ppgtt->pdp.page_directory[i]); > + for (i = 0; i < GEN8_3LVL_PDPES; i++) { > + struct i915_page_directory * const pd = > + i915_pd_entry(ppgtt->pd, i); > + > + s->i915_context_pdps[i] = px_dma(pd); > + } > } > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 07f86d474fa2..9a1f956a817a 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -661,7 +661,8 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) > return ERR_PTR(-ENOMEM); > } > > - atomic_set(&pt->used_ptes, 0); > + atomic_set(&pt->used, 0); > + > return pt; > } > > @@ -683,11 +684,28 @@ static void gen6_initialize_pt(struct i915_address_space *vm, > fill32_px(vm, pt, vm->scratch_pte); > } > > +static struct i915_page_directory *__alloc_pd(const unsigned int entries) > +{ > + struct i915_page_directory *pd; > + > + pd = kmalloc(sizeof(*pd) + > + entries * sizeof(pd->entry[0]), I915_GFP_ALLOW_FAIL); > + > + if (unlikely(!pd)) > + return NULL; > + > + memset(&pd->base, 0, sizeof(pd->base)); > + atomic_set(&pd->used, 0); > + spin_lock_init(&pd->lock); > + > + return pd; > +} > + > static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) > { > struct i915_page_directory *pd; > > - pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); > + pd = __alloc_pd(512); > if (unlikely(!pd)) > return ERR_PTR(-ENOMEM); > > @@ -696,8 +714,6 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) > return ERR_PTR(-ENOMEM); > } > > - atomic_set(&pd->used_pdes, 0); > - spin_lock_init(&pd->lock); > return pd; > } > > @@ -713,88 +729,56 @@ static void gen8_initialize_pd(struct i915_address_space *vm, > { > fill_px(vm, pd, > gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); > - memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES); > + memset_p(pd->entry, vm->scratch_pt, I915_PDES); > } > > -static int __pdp_init(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp) > +static struct i915_page_directory *alloc_pdp(struct i915_address_space *vm) > { > - const unsigned int pdpes = i915_pdpes_per_pdp(vm); > + struct i915_page_directory *pdp; > > - pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), > - I915_GFP_ALLOW_FAIL); > - if (unlikely(!pdp->page_directory)) > - return -ENOMEM; > - > - memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes); > - > - atomic_set(&pdp->used_pdpes, 0); > - spin_lock_init(&pdp->lock); > - return 0; > -} > - > -static void __pdp_fini(struct i915_page_directory_pointer *pdp) > -{ > - kfree(pdp->page_directory); > - pdp->page_directory = NULL; > -} > - > -static struct i915_page_directory_pointer * > -alloc_pdp(struct i915_address_space *vm) > -{ > - struct i915_page_directory_pointer *pdp; > - int ret = -ENOMEM; > - > - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); > - > - pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); > + pdp = __alloc_pd(i915_pdpes_per_pdp(vm)); > if (!pdp) > return ERR_PTR(-ENOMEM); > > - ret = __pdp_init(vm, pdp); > - if (ret) > - goto fail_bitmap; > - > - ret = setup_px(vm, pdp); > - if (ret) > - goto fail_page_m; > + if (i915_vm_is_4lvl(vm)) { > + if (unlikely(setup_px(vm, pdp))) { > + kfree(pdp); > + return ERR_PTR(-ENOMEM); > + } > + } > > return pdp; > - > -fail_page_m: > - __pdp_fini(pdp); > -fail_bitmap: > - kfree(pdp); > - > - return ERR_PTR(ret); > } > > static void free_pdp(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp) > + struct i915_page_directory *pdp) > { > - __pdp_fini(pdp); > - > - if (!i915_vm_is_4lvl(vm)) > - return; > + if (i915_vm_is_4lvl(vm)) > + cleanup_px(vm, pdp); > > - cleanup_px(vm, pdp); > kfree(pdp); > } > > -static void gen8_initialize_pdp(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp) > +static void gen8_initialize_4lvl_pdp(struct i915_address_space *vm, > + struct i915_page_directory *pdp) > { > fill_px(vm, pdp, > gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC)); > + memset_p(pdp->entry, vm->scratch_pd, 512); > +} > + > +static void gen8_initialize_3lvl_pdp(struct i915_address_space *vm, > + struct i915_page_directory *pdp) > +{ > + memset_p(pdp->entry, vm->scratch_pd, GEN8_3LVL_PDPES); > } > > static void gen8_initialize_pml4(struct i915_address_space *vm, > - struct i915_pml4 *pml4) > + struct i915_page_directory *pml4) > { > fill_px(vm, pml4, > gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); > - memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); > - spin_lock_init(&pml4->lock); > + memset_p(pml4->entry, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); > } > > /* > @@ -822,8 +806,8 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, > memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries); > kunmap_atomic(vaddr); > > - GEM_BUG_ON(num_entries > atomic_read(&pt->used_ptes)); > - return !atomic_sub_return(num_entries, &pt->used_ptes); > + GEM_BUG_ON(num_entries > atomic_read(&pt->used)); > + return !atomic_sub_return(num_entries, &pt->used); > } > > static void gen8_ppgtt_set_pde(struct i915_address_space *vm, > @@ -854,12 +838,12 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, > continue; > > spin_lock(&pd->lock); > - if (!atomic_read(&pt->used_ptes)) { > + if (!atomic_read(&pt->used)) { > gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); > - pd->page_table[pde] = vm->scratch_pt; > + pd->entry[pde] = vm->scratch_pt; > > - GEM_BUG_ON(!atomic_read(&pd->used_pdes)); > - atomic_dec(&pd->used_pdes); > + GEM_BUG_ON(!atomic_read(&pd->used)); > + atomic_dec(&pd->used); > free = true; > } > spin_unlock(&pd->lock); > @@ -867,11 +851,11 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, > free_pt(vm, pt); > } > > - return !atomic_read(&pd->used_pdes); > + return !atomic_read(&pd->used); > } > > static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp, > + struct i915_page_directory *pdp, > struct i915_page_directory *pd, > unsigned int pdpe) > { > @@ -889,7 +873,7 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, > * Caller can use the return value to update higher-level entries > */ > static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp, > + struct i915_page_directory * const pdp, > u64 start, u64 length) > { > struct i915_page_directory *pd; > @@ -904,12 +888,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > continue; > > spin_lock(&pdp->lock); > - if (!atomic_read(&pd->used_pdes)) { > + if (!atomic_read(&pd->used)) { > gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); > - pdp->page_directory[pdpe] = vm->scratch_pd; > + pdp->entry[pdpe] = vm->scratch_pd; > > - GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); > - atomic_dec(&pdp->used_pdpes); > + GEM_BUG_ON(!atomic_read(&pdp->used)); > + atomic_dec(&pdp->used); > free = true; > } > spin_unlock(&pdp->lock); > @@ -917,17 +901,17 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, > free_pd(vm, pd); > } > > - return !atomic_read(&pdp->used_pdpes); > + return !atomic_read(&pdp->used); > } > > static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, > u64 start, u64 length) > { > - gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); > + gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); > } > > -static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, > - struct i915_page_directory_pointer *pdp, > +static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4, > + struct i915_page_directory *pdp, > unsigned int pml4e) > { > gen8_ppgtt_pml4e_t *vaddr; > @@ -945,8 +929,8 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, > u64 start, u64 length) > { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - struct i915_pml4 *pml4 = &ppgtt->pml4; > - struct i915_page_directory_pointer *pdp; > + struct i915_page_directory * const pml4 = ppgtt->pd; > + struct i915_page_directory *pdp; > unsigned int pml4e; > > GEM_BUG_ON(!i915_vm_is_4lvl(vm)); > @@ -959,9 +943,9 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, > continue; > > spin_lock(&pml4->lock); > - if (!atomic_read(&pdp->used_pdpes)) { > + if (!atomic_read(&pdp->used)) { > gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); > - pml4->pdps[pml4e] = vm->scratch_pdp; > + pml4->entry[pml4e] = vm->scratch_pdp; > free = true; > } > spin_unlock(&pml4->lock); > @@ -998,7 +982,7 @@ static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) > > static __always_inline bool > gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, > - struct i915_page_directory_pointer *pdp, > + struct i915_page_directory *pdp, > struct sgt_dma *iter, > struct gen8_insert_pte *idx, > enum i915_cache_level cache_level, > @@ -1010,8 +994,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, > bool ret; > > GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); > - pd = pdp->page_directory[idx->pdpe]; > - vaddr = kmap_atomic_px(pd->page_table[idx->pde]); > + pd = i915_pd_entry(pdp, idx->pdpe); > + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); > do { > vaddr[idx->pte] = pte_encode | iter->dma; > > @@ -1041,11 +1025,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, > } > > GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); > - pd = pdp->page_directory[idx->pdpe]; > + pd = pdp->entry[idx->pdpe]; > } > > kunmap_atomic(vaddr); > - vaddr = kmap_atomic_px(pd->page_table[idx->pde]); > + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); > } > } while (1); > kunmap_atomic(vaddr); > @@ -1062,14 +1046,14 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, > struct sgt_dma iter = sgt_dma(vma); > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > - gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, > + gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx, > cache_level, flags); > > vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; > } > > static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > - struct i915_page_directory_pointer **pdps, > + struct i915_page_directory *pml4, > struct sgt_dma *iter, > enum i915_cache_level cache_level, > u32 flags) > @@ -1080,8 +1064,9 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > > do { > struct gen8_insert_pte idx = gen8_insert_pte(start); > - struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; > - struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; > + struct i915_page_directory *pdp = > + i915_pdp_entry(pml4, idx.pml4e); > + struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe); > unsigned int page_size; > bool maybe_64K = false; > gen8_pte_t encode = pte_encode; > @@ -1099,7 +1084,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > > vaddr = kmap_atomic_px(pd); > } else { > - struct i915_page_table *pt = pd->page_table[idx.pde]; > + struct i915_page_table *pt = i915_pt_entry(pd, idx.pde); > > index = idx.pte; > max = GEN8_PTES; > @@ -1174,7 +1159,8 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, > u16 i; > > encode = vma->vm->scratch_pte; > - vaddr = kmap_atomic_px(pd->page_table[idx.pde]); > + vaddr = kmap_atomic_px(i915_pt_entry(pd, > + idx.pde)); > > for (i = 1; i < index; i += 16) > memset64(vaddr + i, encode, 15); > @@ -1194,15 +1180,16 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, > { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > struct sgt_dma iter = sgt_dma(vma); > - struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; > + struct i915_page_directory * const pml4 = ppgtt->pd; > > if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { > - gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level, > + gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, > flags); > } else { > struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); > > - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], > + while (gen8_ppgtt_insert_pte_entries(ppgtt, > + i915_pdp_entry(pml4, idx.pml4e++), > &iter, &idx, cache_level, > flags)) > GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); > @@ -1217,8 +1204,8 @@ static void gen8_free_page_tables(struct i915_address_space *vm, > int i; > > for (i = 0; i < I915_PDES; i++) { > - if (pd->page_table[i] != vm->scratch_pt) > - free_pt(vm, pd->page_table[i]); > + if (pd->entry[i] != vm->scratch_pt) > + free_pt(vm, pd->entry[i]); > } > } > > @@ -1277,7 +1264,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) > gen8_initialize_pt(vm, vm->scratch_pt); > gen8_initialize_pd(vm, vm->scratch_pd); > if (i915_vm_is_4lvl(vm)) > - gen8_initialize_pdp(vm, vm->scratch_pdp); > + gen8_initialize_4lvl_pdp(vm, vm->scratch_pdp); > > return 0; > > @@ -1299,7 +1286,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) > int i; > > if (i915_vm_is_4lvl(vm)) { > - const u64 daddr = px_dma(&ppgtt->pml4); > + const u64 daddr = px_dma(ppgtt->pd); > > I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); > I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); > @@ -1336,17 +1323,17 @@ static void gen8_free_scratch(struct i915_address_space *vm) > } > > static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp) > + struct i915_page_directory *pdp) > { > const unsigned int pdpes = i915_pdpes_per_pdp(vm); > int i; > > for (i = 0; i < pdpes; i++) { > - if (pdp->page_directory[i] == vm->scratch_pd) > + if (pdp->entry[i] == vm->scratch_pd) > continue; > > - gen8_free_page_tables(vm, pdp->page_directory[i]); > - free_pd(vm, pdp->page_directory[i]); > + gen8_free_page_tables(vm, pdp->entry[i]); > + free_pd(vm, pdp->entry[i]); > } > > free_pdp(vm, pdp); > @@ -1354,16 +1341,19 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, > > static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) > { > + struct i915_page_directory * const pml4 = ppgtt->pd; > int i; > > for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { > - if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp) > + struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); > + > + if (pdp == ppgtt->vm.scratch_pdp) > continue; > > - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]); > + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); > } > > - cleanup_px(&ppgtt->vm, &ppgtt->pml4); > + cleanup_px(&ppgtt->vm, pml4); > } > > static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > @@ -1377,7 +1367,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) > if (i915_vm_is_4lvl(vm)) > gen8_ppgtt_cleanup_4lvl(ppgtt); > else > - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp); > + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); > > gen8_free_scratch(vm); > } > @@ -1406,10 +1396,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) > gen8_initialize_pt(vm, pt); > > - old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt); > + old = cmpxchg(&pd->entry[pde], vm->scratch_pt, pt); > if (old == vm->scratch_pt) { > gen8_ppgtt_set_pde(vm, pd, pt, pde); > - atomic_inc(&pd->used_pdes); > + atomic_inc(&pd->used); > } else { > free_pt(vm, pt); > pt = old; > @@ -1418,7 +1408,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > spin_lock(&pd->lock); > } > > - atomic_add(count, &pt->used_ptes); > + atomic_add(count, &pt->used); > } > spin_unlock(&pd->lock); > > @@ -1430,7 +1420,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, > } > > static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > - struct i915_page_directory_pointer *pdp, > + struct i915_page_directory *pdp, > u64 start, u64 length) > { > struct i915_page_directory *pd; > @@ -1451,11 +1441,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > > gen8_initialize_pd(vm, pd); > > - old = cmpxchg(&pdp->page_directory[pdpe], > - vm->scratch_pd, pd); > + old = cmpxchg(&pdp->entry[pdpe], vm->scratch_pd, pd); > if (old == vm->scratch_pd) { > gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); > - atomic_inc(&pdp->used_pdpes); > + atomic_inc(&pdp->used); > } else { > free_pd(vm, pd); > pd = old; > @@ -1463,7 +1452,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > > spin_lock(&pdp->lock); > } > - atomic_inc(&pd->used_pdes); > + atomic_inc(&pd->used); > spin_unlock(&pdp->lock); > > ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); > @@ -1471,7 +1460,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > goto unwind_pd; > > spin_lock(&pdp->lock); > - atomic_dec(&pd->used_pdes); > + atomic_dec(&pd->used); > } > spin_unlock(&pdp->lock); > > @@ -1479,10 +1468,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, > > unwind_pd: > spin_lock(&pdp->lock); > - if (atomic_dec_and_test(&pd->used_pdes)) { > + if (atomic_dec_and_test(&pd->used)) { > gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); > - GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); > - atomic_dec(&pdp->used_pdpes); > + GEM_BUG_ON(!atomic_read(&pdp->used)); > + atomic_dec(&pdp->used); > free_pd(vm, pd); > } > spin_unlock(&pdp->lock); > @@ -1495,23 +1484,24 @@ static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, > u64 start, u64 length) > { > return gen8_ppgtt_alloc_pdp(vm, > - &i915_vm_to_ppgtt(vm)->pdp, start, length); > + i915_vm_to_ppgtt(vm)->pd, start, length); > } > > static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > u64 start, u64 length) > { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > - struct i915_pml4 *pml4 = &ppgtt->pml4; > - struct i915_page_directory_pointer *pdp; > + struct i915_page_directory * const pml4 = ppgtt->pd; > + struct i915_page_directory *pdp; > u64 from = start; > u32 pml4e; > int ret; > > spin_lock(&pml4->lock); > gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { > + > if (pdp == vm->scratch_pdp) { > - struct i915_page_directory_pointer *old; > + struct i915_page_directory *old; > > spin_unlock(&pml4->lock); > > @@ -1519,9 +1509,9 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > if (IS_ERR(pdp)) > goto unwind; > > - gen8_initialize_pdp(vm, pdp); > + gen8_initialize_4lvl_pdp(vm, pdp); > > - old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp); > + old = cmpxchg(&pml4->entry[pml4e], vm->scratch_pdp, pdp); > if (old == vm->scratch_pdp) { > gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); > } else { > @@ -1531,7 +1521,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > > spin_lock(&pml4->lock); > } > - atomic_inc(&pdp->used_pdpes); > + atomic_inc(&pdp->used); > spin_unlock(&pml4->lock); > > ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); > @@ -1539,7 +1529,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > goto unwind_pdp; > > spin_lock(&pml4->lock); > - atomic_dec(&pdp->used_pdpes); > + atomic_dec(&pdp->used); > } > spin_unlock(&pml4->lock); > > @@ -1547,7 +1537,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > > unwind_pdp: > spin_lock(&pml4->lock); > - if (atomic_dec_and_test(&pdp->used_pdpes)) { > + if (atomic_dec_and_test(&pdp->used)) { > gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); > free_pdp(vm, pdp); > } > @@ -1560,7 +1550,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, > static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > { > struct i915_address_space *vm = &ppgtt->vm; > - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; > + struct i915_page_directory *pdp = ppgtt->pd; > struct i915_page_directory *pd; > u64 start = 0, length = ppgtt->vm.total; > u64 from = start; > @@ -1573,10 +1563,12 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > > gen8_initialize_pd(vm, pd); > gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); > - atomic_inc(&pdp->used_pdpes); > + > + atomic_inc(&pdp->used); > } > > - atomic_inc(&pdp->used_pdpes); /* never remove */ > + atomic_inc(&pdp->used); /* never remove */ > + > return 0; > > unwind: > @@ -1585,7 +1577,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) > gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); > free_pd(vm, pd); > } > - atomic_set(&pdp->used_pdpes, 0); > + atomic_set(&pdp->used, 0); > return -ENOMEM; > } > > @@ -1640,27 +1632,25 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > if (err) > goto err_free; > > - if (i915_vm_is_4lvl(&ppgtt->vm)) { > - err = setup_px(&ppgtt->vm, &ppgtt->pml4); > - if (err) > - goto err_scratch; > + ppgtt->pd = alloc_pdp(&ppgtt->vm); > + if (IS_ERR(ppgtt->pd)) { > + err = PTR_ERR(ppgtt->pd); > + goto err_scratch; > + } > > - gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4); > + if (i915_vm_is_4lvl(&ppgtt->vm)) { > + gen8_initialize_pml4(&ppgtt->vm, ppgtt->pd); > > ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; > ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; > ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; > } else { > - err = __pdp_init(&ppgtt->vm, &ppgtt->pdp); > - if (err) > - goto err_scratch; > + gen8_initialize_3lvl_pdp(&ppgtt->vm, ppgtt->pd); > > if (intel_vgpu_active(i915)) { > err = gen8_preallocate_top_level_pdp(ppgtt); > - if (err) { > - __pdp_fini(&ppgtt->pdp); > - goto err_scratch; > - } > + if (err) > + goto err_pdp; > } > > ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; > @@ -1675,6 +1665,8 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) > > return ppgtt; > > +err_pdp: > + free_pdp(&ppgtt->vm, ppgtt->pd); > err_scratch: > gen8_free_scratch(&ppgtt->vm); > err_free: > @@ -1740,15 +1732,16 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) > static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > u64 start, u64 length) > { > - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; > + struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > + const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; > + const gen6_pte_t scratch_pte = vm->scratch_pte; > unsigned int pde = first_entry / GEN6_PTES; > unsigned int pte = first_entry % GEN6_PTES; > unsigned int num_entries = length / I915_GTT_PAGE_SIZE; > - const gen6_pte_t scratch_pte = vm->scratch_pte; > > while (num_entries) { > - struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++]; > + struct i915_page_table * const pt = > + i915_pt_entry(ppgtt->base.pd, pde++); > const unsigned int count = min(num_entries, GEN6_PTES - pte); > gen6_pte_t *vaddr; > > @@ -1756,8 +1749,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, > > num_entries -= count; > > - GEM_BUG_ON(count > atomic_read(&pt->used_ptes)); > - if (!atomic_sub_return(count, &pt->used_ptes)) > + GEM_BUG_ON(count > atomic_read(&pt->used)); > + if (!atomic_sub_return(count, &pt->used)) > ppgtt->scan_for_unused_pt = true; > > /* > @@ -1781,6 +1774,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > u32 flags) > { > struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); > + struct i915_page_directory * const pd = ppgtt->pd; > unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE; > unsigned act_pt = first_entry / GEN6_PTES; > unsigned act_pte = first_entry % GEN6_PTES; > @@ -1788,9 +1782,9 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > struct sgt_dma iter = sgt_dma(vma); > gen6_pte_t *vaddr; > > - GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt); > + GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt); > > - vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); > + vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); > do { > vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); > > @@ -1806,7 +1800,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, > > if (++act_pte == GEN6_PTES) { > kunmap_atomic(vaddr); > - vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); > + vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); > act_pte = 0; > } > } while (1); > @@ -1819,6 +1813,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > u64 start, u64 length) > { > struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); > + struct i915_page_directory * const pd = ppgtt->base.pd; > struct i915_page_table *pt; > intel_wakeref_t wakeref; > u64 from = start; > @@ -1827,14 +1822,14 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > > wakeref = intel_runtime_pm_get(vm->i915); > > - spin_lock(&ppgtt->base.pd.lock); > - gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) { > + spin_lock(&pd->lock); > + gen6_for_each_pde(pt, pd, start, length, pde) { > const unsigned int count = gen6_pte_count(start, length); > > if (pt == vm->scratch_pt) { > struct i915_page_table *old; > > - spin_unlock(&ppgtt->base.pd.lock); > + spin_unlock(&pd->lock); > > pt = alloc_pt(vm); > if (IS_ERR(pt)) > @@ -1842,10 +1837,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > > gen6_initialize_pt(vm, pt); > > - old = cmpxchg(&ppgtt->base.pd.page_table[pde], > - vm->scratch_pt, pt); > + old = cmpxchg(&pd->entry[pde], vm->scratch_pt, pt); > if (old == vm->scratch_pt) { > - ppgtt->base.pd.page_table[pde] = pt; > if (i915_vma_is_bound(ppgtt->vma, > I915_VMA_GLOBAL_BIND)) { > gen6_write_pde(ppgtt, pde, pt); > @@ -1856,12 +1849,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > pt = old; > } > > - spin_lock(&ppgtt->base.pd.lock); > + spin_lock(&pd->lock); > } > > - atomic_add(count, &pt->used_ptes); > + atomic_add(count, &pt->used); > } > - spin_unlock(&ppgtt->base.pd.lock); > + spin_unlock(&pd->lock); > > if (flush) { > mark_tlbs_dirty(&ppgtt->base); > @@ -1881,6 +1874,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, > static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) > { > struct i915_address_space * const vm = &ppgtt->base.vm; > + struct i915_page_directory * const pd = ppgtt->base.pd; > struct i915_page_table *unused; > u32 pde; > int ret; > @@ -1900,9 +1894,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) > } > > gen6_initialize_pt(vm, vm->scratch_pt); > - gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) > - ppgtt->base.pd.page_table[pde] = vm->scratch_pt; > - spin_lock_init(&ppgtt->base.pd.lock); > + > + gen6_for_all_pdes(unused, pd, pde) > + pd->entry[pde] = vm->scratch_pt; > > return 0; > } > @@ -1915,10 +1909,11 @@ static void gen6_ppgtt_free_scratch(struct i915_address_space *vm) > > static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) > { > + struct i915_page_directory * const pd = ppgtt->base.pd; > struct i915_page_table *pt; > u32 pde; > > - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) > + gen6_for_all_pdes(pt, pd, pde) > if (pt != ppgtt->base.vm.scratch_pt) > free_pt(&ppgtt->base.vm, pt); > } > @@ -1982,6 +1977,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) > > gen6_ppgtt_free_pd(ppgtt); > gen6_ppgtt_free_scratch(vm); > + kfree(ppgtt->base.pd); > } > > static int pd_vma_set_pages(struct i915_vma *vma) > @@ -2007,10 +2003,10 @@ static int pd_vma_bind(struct i915_vma *vma, > struct i915_page_table *pt; > unsigned int pde; > > - ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); > + ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); > ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; > > - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) > + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) > gen6_write_pde(ppgtt, pde, pt); > > mark_tlbs_dirty(&ppgtt->base); > @@ -2022,6 +2018,7 @@ static int pd_vma_bind(struct i915_vma *vma, > static void pd_vma_unbind(struct i915_vma *vma) > { > struct gen6_ppgtt *ppgtt = vma->private; > + struct i915_page_directory * const pd = ppgtt->base.pd; > struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt; > struct i915_page_table *pt; > unsigned int pde; > @@ -2030,12 +2027,12 @@ static void pd_vma_unbind(struct i915_vma *vma) > return; > > /* Free all no longer used page tables */ > - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) { > - if (atomic_read(&pt->used_ptes) || pt == scratch_pt) > + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { > + if (atomic_read(&pt->used) || pt == scratch_pt) > continue; > > free_pt(&ppgtt->base.vm, pt); > - ppgtt->base.pd.page_table[pde] = scratch_pt; > + pd->entry[pde] = scratch_pt; > } > > ppgtt->scan_for_unused_pt = false; > @@ -2164,9 +2161,15 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > goto err_free; > } > > + ppgtt->base.pd = __alloc_pd(512); > + if (!ppgtt->base.pd) { > + err = -ENOMEM; > + goto err_work; > + } > + > err = gen6_ppgtt_init_scratch(ppgtt); > if (err) > - goto err_work; > + goto err_pd; > > ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); > if (IS_ERR(ppgtt->vma)) { > @@ -2178,6 +2181,8 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) > > err_scratch: > gen6_ppgtt_free_scratch(&ppgtt->base.vm); > +err_pd: > + kfree(ppgtt->base.pd); > err_work: > kfree(ppgtt->work); > err_free: > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index 89437d0a721c..49f44071def4 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -248,28 +248,14 @@ struct i915_page_dma { > > struct i915_page_table { > struct i915_page_dma base; > - atomic_t used_ptes; > + atomic_t used; > }; > > struct i915_page_directory { > struct i915_page_dma base; > - > - struct i915_page_table *page_table[I915_PDES]; /* PDEs */ > - atomic_t used_pdes; > - spinlock_t lock; > -}; > - > -struct i915_page_directory_pointer { > - struct i915_page_dma base; > - struct i915_page_directory **page_directory; > - atomic_t used_pdpes; > - spinlock_t lock; > -}; > - > -struct i915_pml4 { > - struct i915_page_dma base; > - struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; > + atomic_t used; > spinlock_t lock; > + void *entry[0]; > }; Furthermore, we can say that only two types of pointer could be stored here. However, unions tend to end up with messy code, so I can understand if the implicit casts from void end up being neater. -Chris
Quoting Chris Wilson (2019-06-11 20:41:59) > Quoting Mika Kuoppala (2019-06-11 18:27:24) > > struct i915_page_table { > > struct i915_page_dma base; > > - atomic_t used_ptes; > > + atomic_t used; > > }; > > > > struct i915_page_directory { > > struct i915_page_dma base; > > - > > - struct i915_page_table *page_table[I915_PDES]; /* PDEs */ > > - atomic_t used_pdes; > > - spinlock_t lock; > > -}; > > - > > -struct i915_page_directory_pointer { > > - struct i915_page_dma base; > > - struct i915_page_directory **page_directory; > > - atomic_t used_pdpes; > > - spinlock_t lock; > > -}; > > - > > -struct i915_pml4 { > > - struct i915_page_dma base; > > - struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; > > + atomic_t used; > > spinlock_t lock; > > + void *entry[0]; > > }; > > And always (albeit with a single bsw discrepancy) 512. At the very least > you can alias a fixed sized variant over the top to remove the extra > pointer chasing you added. I would float your make bsw behave identically patch. In the grand scheme of things, no one will ever notice that bsw alone saved a couple of pages per GTT. -Chris
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index c86ca9f21532..dbab0ab1cef1 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1038,7 +1038,7 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data) if (i915_vm_is_4lvl(vm)) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - const dma_addr_t pd_daddr = px_dma(&ppgtt->pml4); + const dma_addr_t pd_daddr = px_dma(ppgtt->pd); cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 5ef932d810a7..6bf34738b4e5 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -55,7 +55,7 @@ #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \ u32 *reg_state__ = (reg_state); \ - const u64 addr__ = px_dma(&ppgtt->pml4); \ + const u64 addr__ = px_dma(ppgtt->pd); \ (reg_state__)[CTX_PDP0_UDW + 1] = upper_32_bits(addr__); \ (reg_state__)[CTX_PDP0_LDW + 1] = lower_32_bits(addr__); \ } while (0) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index c834d016c965..3b857994943c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1523,7 +1523,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd.base.ggtt_offset << 10; + *cs++ = ppgtt->pd->base.ggtt_offset << 10; intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index e301efb18d45..f1e1261ac3db 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -375,11 +375,13 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, return -EINVAL; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { - px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0]; + px_dma(ppgtt->pd) = mm->ppgtt_mm.shadow_pdps[0]; } else { for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) { - px_dma(ppgtt->pdp.page_directory[i]) = - mm->ppgtt_mm.shadow_pdps[i]; + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + px_dma(pd) = mm->ppgtt_mm.shadow_pdps[i]; } } @@ -1128,11 +1130,14 @@ i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s, int i; if (i915_vm_is_4lvl(&ppgtt->vm)) { - px_dma(&ppgtt->pml4) = s->i915_context_pml4; + px_dma(ppgtt->pd) = s->i915_context_pml4; } else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) - px_dma(ppgtt->pdp.page_directory[i]) = - s->i915_context_pdps[i]; + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + px_dma(pd) = s->i915_context_pdps[i]; + } } } @@ -1186,11 +1191,14 @@ i915_context_ppgtt_root_save(struct intel_vgpu_submission *s, int i; if (i915_vm_is_4lvl(&ppgtt->vm)) { - s->i915_context_pml4 = px_dma(&ppgtt->pml4); + s->i915_context_pml4 = px_dma(ppgtt->pd); } else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) - s->i915_context_pdps[i] = - px_dma(ppgtt->pdp.page_directory[i]); + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + struct i915_page_directory * const pd = + i915_pd_entry(ppgtt->pd, i); + + s->i915_context_pdps[i] = px_dma(pd); + } } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 07f86d474fa2..9a1f956a817a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -661,7 +661,8 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - atomic_set(&pt->used_ptes, 0); + atomic_set(&pt->used, 0); + return pt; } @@ -683,11 +684,28 @@ static void gen6_initialize_pt(struct i915_address_space *vm, fill32_px(vm, pt, vm->scratch_pte); } +static struct i915_page_directory *__alloc_pd(const unsigned int entries) +{ + struct i915_page_directory *pd; + + pd = kmalloc(sizeof(*pd) + + entries * sizeof(pd->entry[0]), I915_GFP_ALLOW_FAIL); + + if (unlikely(!pd)) + return NULL; + + memset(&pd->base, 0, sizeof(pd->base)); + atomic_set(&pd->used, 0); + spin_lock_init(&pd->lock); + + return pd; +} + static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); + pd = __alloc_pd(512); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); @@ -696,8 +714,6 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - atomic_set(&pd->used_pdes, 0); - spin_lock_init(&pd->lock); return pd; } @@ -713,88 +729,56 @@ static void gen8_initialize_pd(struct i915_address_space *vm, { fill_px(vm, pd, gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); - memset_p((void **)pd->page_table, vm->scratch_pt, I915_PDES); + memset_p(pd->entry, vm->scratch_pt, I915_PDES); } -static int __pdp_init(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp) +static struct i915_page_directory *alloc_pdp(struct i915_address_space *vm) { - const unsigned int pdpes = i915_pdpes_per_pdp(vm); + struct i915_page_directory *pdp; - pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), - I915_GFP_ALLOW_FAIL); - if (unlikely(!pdp->page_directory)) - return -ENOMEM; - - memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes); - - atomic_set(&pdp->used_pdpes, 0); - spin_lock_init(&pdp->lock); - return 0; -} - -static void __pdp_fini(struct i915_page_directory_pointer *pdp) -{ - kfree(pdp->page_directory); - pdp->page_directory = NULL; -} - -static struct i915_page_directory_pointer * -alloc_pdp(struct i915_address_space *vm) -{ - struct i915_page_directory_pointer *pdp; - int ret = -ENOMEM; - - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); - - pdp = kmalloc(sizeof(*pdp), GFP_KERNEL); + pdp = __alloc_pd(i915_pdpes_per_pdp(vm)); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(vm, pdp); - if (ret) - goto fail_bitmap; - - ret = setup_px(vm, pdp); - if (ret) - goto fail_page_m; + if (i915_vm_is_4lvl(vm)) { + if (unlikely(setup_px(vm, pdp))) { + kfree(pdp); + return ERR_PTR(-ENOMEM); + } + } return pdp; - -fail_page_m: - __pdp_fini(pdp); -fail_bitmap: - kfree(pdp); - - return ERR_PTR(ret); } static void free_pdp(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp) + struct i915_page_directory *pdp) { - __pdp_fini(pdp); - - if (!i915_vm_is_4lvl(vm)) - return; + if (i915_vm_is_4lvl(vm)) + cleanup_px(vm, pdp); - cleanup_px(vm, pdp); kfree(pdp); } -static void gen8_initialize_pdp(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp) +static void gen8_initialize_4lvl_pdp(struct i915_address_space *vm, + struct i915_page_directory *pdp) { fill_px(vm, pdp, gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC)); + memset_p(pdp->entry, vm->scratch_pd, 512); +} + +static void gen8_initialize_3lvl_pdp(struct i915_address_space *vm, + struct i915_page_directory *pdp) +{ + memset_p(pdp->entry, vm->scratch_pd, GEN8_3LVL_PDPES); } static void gen8_initialize_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4) + struct i915_page_directory *pml4) { fill_px(vm, pml4, gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); - memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); - spin_lock_init(&pml4->lock); + memset_p(pml4->entry, vm->scratch_pdp, GEN8_PML4ES_PER_PML4); } /* @@ -822,8 +806,8 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm, memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries); kunmap_atomic(vaddr); - GEM_BUG_ON(num_entries > atomic_read(&pt->used_ptes)); - return !atomic_sub_return(num_entries, &pt->used_ptes); + GEM_BUG_ON(num_entries > atomic_read(&pt->used)); + return !atomic_sub_return(num_entries, &pt->used); } static void gen8_ppgtt_set_pde(struct i915_address_space *vm, @@ -854,12 +838,12 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, continue; spin_lock(&pd->lock); - if (!atomic_read(&pt->used_ptes)) { + if (!atomic_read(&pt->used)) { gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); - pd->page_table[pde] = vm->scratch_pt; + pd->entry[pde] = vm->scratch_pt; - GEM_BUG_ON(!atomic_read(&pd->used_pdes)); - atomic_dec(&pd->used_pdes); + GEM_BUG_ON(!atomic_read(&pd->used)); + atomic_dec(&pd->used); free = true; } spin_unlock(&pd->lock); @@ -867,11 +851,11 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, free_pt(vm, pt); } - return !atomic_read(&pd->used_pdes); + return !atomic_read(&pd->used); } static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pdp, struct i915_page_directory *pd, unsigned int pdpe) { @@ -889,7 +873,7 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, * Caller can use the return value to update higher-level entries */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, + struct i915_page_directory * const pdp, u64 start, u64 length) { struct i915_page_directory *pd; @@ -904,12 +888,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, continue; spin_lock(&pdp->lock); - if (!atomic_read(&pd->used_pdes)) { + if (!atomic_read(&pd->used)) { gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - pdp->page_directory[pdpe] = vm->scratch_pd; + pdp->entry[pdpe] = vm->scratch_pd; - GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); - atomic_dec(&pdp->used_pdpes); + GEM_BUG_ON(!atomic_read(&pdp->used)); + atomic_dec(&pdp->used); free = true; } spin_unlock(&pdp->lock); @@ -917,17 +901,17 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, free_pd(vm, pd); } - return !atomic_read(&pdp->used_pdpes); + return !atomic_read(&pdp->used); } static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, u64 start, u64 length) { - gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); + gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); } -static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, +static void gen8_ppgtt_set_pml4e(struct i915_page_directory *pml4, + struct i915_page_directory *pdp, unsigned int pml4e) { gen8_ppgtt_pml4e_t *vaddr; @@ -945,8 +929,8 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, u64 start, u64 length) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_pml4 *pml4 = &ppgtt->pml4; - struct i915_page_directory_pointer *pdp; + struct i915_page_directory * const pml4 = ppgtt->pd; + struct i915_page_directory *pdp; unsigned int pml4e; GEM_BUG_ON(!i915_vm_is_4lvl(vm)); @@ -959,9 +943,9 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, continue; spin_lock(&pml4->lock); - if (!atomic_read(&pdp->used_pdpes)) { + if (!atomic_read(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - pml4->pdps[pml4e] = vm->scratch_pdp; + pml4->entry[pml4e] = vm->scratch_pdp; free = true; } spin_unlock(&pml4->lock); @@ -998,7 +982,7 @@ static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) static __always_inline bool gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pdp, struct sgt_dma *iter, struct gen8_insert_pte *idx, enum i915_cache_level cache_level, @@ -1010,8 +994,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, bool ret; GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = pdp->page_directory[idx->pdpe]; - vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + pd = i915_pd_entry(pdp, idx->pdpe); + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); do { vaddr[idx->pte] = pte_encode | iter->dma; @@ -1041,11 +1025,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, } GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = pdp->page_directory[idx->pdpe]; + pd = pdp->entry[idx->pdpe]; } kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(pd->page_table[idx->pde]); + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); } } while (1); kunmap_atomic(vaddr); @@ -1062,14 +1046,14 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx, cache_level, flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, - struct i915_page_directory_pointer **pdps, + struct i915_page_directory *pml4, struct sgt_dma *iter, enum i915_cache_level cache_level, u32 flags) @@ -1080,8 +1064,9 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, do { struct gen8_insert_pte idx = gen8_insert_pte(start); - struct i915_page_directory_pointer *pdp = pdps[idx.pml4e]; - struct i915_page_directory *pd = pdp->page_directory[idx.pdpe]; + struct i915_page_directory *pdp = + i915_pdp_entry(pml4, idx.pml4e); + struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe); unsigned int page_size; bool maybe_64K = false; gen8_pte_t encode = pte_encode; @@ -1099,7 +1084,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr = kmap_atomic_px(pd); } else { - struct i915_page_table *pt = pd->page_table[idx.pde]; + struct i915_page_table *pt = i915_pt_entry(pd, idx.pde); index = idx.pte; max = GEN8_PTES; @@ -1174,7 +1159,8 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, u16 i; encode = vma->vm->scratch_pte; - vaddr = kmap_atomic_px(pd->page_table[idx.pde]); + vaddr = kmap_atomic_px(i915_pt_entry(pd, + idx.pde)); for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); @@ -1194,15 +1180,16 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); - struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + struct i915_page_directory * const pml4 = ppgtt->pd; if (vma->page_sizes.sg > I915_GTT_PAGE_SIZE) { - gen8_ppgtt_insert_huge_entries(vma, pdps, &iter, cache_level, + gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, flags); } else { struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], + while (gen8_ppgtt_insert_pte_entries(ppgtt, + i915_pdp_entry(pml4, idx.pml4e++), &iter, &idx, cache_level, flags)) GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); @@ -1217,8 +1204,8 @@ static void gen8_free_page_tables(struct i915_address_space *vm, int i; for (i = 0; i < I915_PDES; i++) { - if (pd->page_table[i] != vm->scratch_pt) - free_pt(vm, pd->page_table[i]); + if (pd->entry[i] != vm->scratch_pt) + free_pt(vm, pd->entry[i]); } } @@ -1277,7 +1264,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); if (i915_vm_is_4lvl(vm)) - gen8_initialize_pdp(vm, vm->scratch_pdp); + gen8_initialize_4lvl_pdp(vm, vm->scratch_pdp); return 0; @@ -1299,7 +1286,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) int i; if (i915_vm_is_4lvl(vm)) { - const u64 daddr = px_dma(&ppgtt->pml4); + const u64 daddr = px_dma(ppgtt->pd); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); @@ -1336,17 +1323,17 @@ static void gen8_free_scratch(struct i915_address_space *vm) } static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp) + struct i915_page_directory *pdp) { const unsigned int pdpes = i915_pdpes_per_pdp(vm); int i; for (i = 0; i < pdpes; i++) { - if (pdp->page_directory[i] == vm->scratch_pd) + if (pdp->entry[i] == vm->scratch_pd) continue; - gen8_free_page_tables(vm, pdp->page_directory[i]); - free_pd(vm, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->entry[i]); + free_pd(vm, pdp->entry[i]); } free_pdp(vm, pdp); @@ -1354,16 +1341,19 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) { + struct i915_page_directory * const pml4 = ppgtt->pd; int i; for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { - if (ppgtt->pml4.pdps[i] == ppgtt->vm.scratch_pdp) + struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); + + if (pdp == ppgtt->vm.scratch_pdp) continue; - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); } - cleanup_px(&ppgtt->vm, &ppgtt->pml4); + cleanup_px(&ppgtt->vm, pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1377,7 +1367,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (i915_vm_is_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); else - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, &ppgtt->pdp); + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); gen8_free_scratch(vm); } @@ -1406,10 +1396,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) gen8_initialize_pt(vm, pt); - old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt); + old = cmpxchg(&pd->entry[pde], vm->scratch_pt, pt); if (old == vm->scratch_pt) { gen8_ppgtt_set_pde(vm, pd, pt, pde); - atomic_inc(&pd->used_pdes); + atomic_inc(&pd->used); } else { free_pt(vm, pt); pt = old; @@ -1418,7 +1408,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, spin_lock(&pd->lock); } - atomic_add(count, &pt->used_ptes); + atomic_add(count, &pt->used); } spin_unlock(&pd->lock); @@ -1430,7 +1420,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, } static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pdp, u64 start, u64 length) { struct i915_page_directory *pd; @@ -1451,11 +1441,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); - old = cmpxchg(&pdp->page_directory[pdpe], - vm->scratch_pd, pd); + old = cmpxchg(&pdp->entry[pdpe], vm->scratch_pd, pd); if (old == vm->scratch_pd) { gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); - atomic_inc(&pdp->used_pdpes); + atomic_inc(&pdp->used); } else { free_pd(vm, pd); pd = old; @@ -1463,7 +1452,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, spin_lock(&pdp->lock); } - atomic_inc(&pd->used_pdes); + atomic_inc(&pd->used); spin_unlock(&pdp->lock); ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); @@ -1471,7 +1460,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto unwind_pd; spin_lock(&pdp->lock); - atomic_dec(&pd->used_pdes); + atomic_dec(&pd->used); } spin_unlock(&pdp->lock); @@ -1479,10 +1468,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, unwind_pd: spin_lock(&pdp->lock); - if (atomic_dec_and_test(&pd->used_pdes)) { + if (atomic_dec_and_test(&pd->used)) { gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - GEM_BUG_ON(!atomic_read(&pdp->used_pdpes)); - atomic_dec(&pdp->used_pdpes); + GEM_BUG_ON(!atomic_read(&pdp->used)); + atomic_dec(&pdp->used); free_pd(vm, pd); } spin_unlock(&pdp->lock); @@ -1495,23 +1484,24 @@ static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, u64 start, u64 length) { return gen8_ppgtt_alloc_pdp(vm, - &i915_vm_to_ppgtt(vm)->pdp, start, length); + i915_vm_to_ppgtt(vm)->pd, start, length); } static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, u64 start, u64 length) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_pml4 *pml4 = &ppgtt->pml4; - struct i915_page_directory_pointer *pdp; + struct i915_page_directory * const pml4 = ppgtt->pd; + struct i915_page_directory *pdp; u64 from = start; u32 pml4e; int ret; spin_lock(&pml4->lock); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { + if (pdp == vm->scratch_pdp) { - struct i915_page_directory_pointer *old; + struct i915_page_directory *old; spin_unlock(&pml4->lock); @@ -1519,9 +1509,9 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, if (IS_ERR(pdp)) goto unwind; - gen8_initialize_pdp(vm, pdp); + gen8_initialize_4lvl_pdp(vm, pdp); - old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp); + old = cmpxchg(&pml4->entry[pml4e], vm->scratch_pdp, pdp); if (old == vm->scratch_pdp) { gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); } else { @@ -1531,7 +1521,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, spin_lock(&pml4->lock); } - atomic_inc(&pdp->used_pdpes); + atomic_inc(&pdp->used); spin_unlock(&pml4->lock); ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); @@ -1539,7 +1529,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto unwind_pdp; spin_lock(&pml4->lock); - atomic_dec(&pdp->used_pdpes); + atomic_dec(&pdp->used); } spin_unlock(&pml4->lock); @@ -1547,7 +1537,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, unwind_pdp: spin_lock(&pml4->lock); - if (atomic_dec_and_test(&pdp->used_pdpes)) { + if (atomic_dec_and_test(&pdp->used)) { gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); free_pdp(vm, pdp); } @@ -1560,7 +1550,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) { struct i915_address_space *vm = &ppgtt->vm; - struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pdp = ppgtt->pd; struct i915_page_directory *pd; u64 start = 0, length = ppgtt->vm.total; u64 from = start; @@ -1573,10 +1563,12 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); - atomic_inc(&pdp->used_pdpes); + + atomic_inc(&pdp->used); } - atomic_inc(&pdp->used_pdpes); /* never remove */ + atomic_inc(&pdp->used); /* never remove */ + return 0; unwind: @@ -1585,7 +1577,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); free_pd(vm, pd); } - atomic_set(&pdp->used_pdpes, 0); + atomic_set(&pdp->used, 0); return -ENOMEM; } @@ -1640,27 +1632,25 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) if (err) goto err_free; - if (i915_vm_is_4lvl(&ppgtt->vm)) { - err = setup_px(&ppgtt->vm, &ppgtt->pml4); - if (err) - goto err_scratch; + ppgtt->pd = alloc_pdp(&ppgtt->vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); + goto err_scratch; + } - gen8_initialize_pml4(&ppgtt->vm, &ppgtt->pml4); + if (i915_vm_is_4lvl(&ppgtt->vm)) { + gen8_initialize_pml4(&ppgtt->vm, ppgtt->pd); ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; } else { - err = __pdp_init(&ppgtt->vm, &ppgtt->pdp); - if (err) - goto err_scratch; + gen8_initialize_3lvl_pdp(&ppgtt->vm, ppgtt->pd); if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); - if (err) { - __pdp_fini(&ppgtt->pdp); - goto err_scratch; - } + if (err) + goto err_pdp; } ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; @@ -1675,6 +1665,8 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) return ppgtt; +err_pdp: + free_pdp(&ppgtt->vm, ppgtt->pd); err_scratch: gen8_free_scratch(&ppgtt->vm); err_free: @@ -1740,15 +1732,16 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_clear_range(struct i915_address_space *vm, u64 start, u64 length) { - struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); - unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; + const gen6_pte_t scratch_pte = vm->scratch_pte; unsigned int pde = first_entry / GEN6_PTES; unsigned int pte = first_entry % GEN6_PTES; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch_pte; while (num_entries) { - struct i915_page_table *pt = ppgtt->base.pd.page_table[pde++]; + struct i915_page_table * const pt = + i915_pt_entry(ppgtt->base.pd, pde++); const unsigned int count = min(num_entries, GEN6_PTES - pte); gen6_pte_t *vaddr; @@ -1756,8 +1749,8 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, num_entries -= count; - GEM_BUG_ON(count > atomic_read(&pt->used_ptes)); - if (!atomic_sub_return(count, &pt->used_ptes)) + GEM_BUG_ON(count > atomic_read(&pt->used)); + if (!atomic_sub_return(count, &pt->used)) ppgtt->scan_for_unused_pt = true; /* @@ -1781,6 +1774,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, u32 flags) { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory * const pd = ppgtt->pd; unsigned first_entry = vma->node.start / I915_GTT_PAGE_SIZE; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; @@ -1788,9 +1782,9 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; - GEM_BUG_ON(ppgtt->pd.page_table[act_pt] == vm->scratch_pt); + GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt); - vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); @@ -1806,7 +1800,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, if (++act_pte == GEN6_PTES) { kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); + vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt)); act_pte = 0; } } while (1); @@ -1819,6 +1813,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, u64 start, u64 length) { struct gen6_ppgtt *ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); + struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *pt; intel_wakeref_t wakeref; u64 from = start; @@ -1827,14 +1822,14 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, wakeref = intel_runtime_pm_get(vm->i915); - spin_lock(&ppgtt->base.pd.lock); - gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) { + spin_lock(&pd->lock); + gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); if (pt == vm->scratch_pt) { struct i915_page_table *old; - spin_unlock(&ppgtt->base.pd.lock); + spin_unlock(&pd->lock); pt = alloc_pt(vm); if (IS_ERR(pt)) @@ -1842,10 +1837,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_initialize_pt(vm, pt); - old = cmpxchg(&ppgtt->base.pd.page_table[pde], - vm->scratch_pt, pt); + old = cmpxchg(&pd->entry[pde], vm->scratch_pt, pt); if (old == vm->scratch_pt) { - ppgtt->base.pd.page_table[pde] = pt; if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { gen6_write_pde(ppgtt, pde, pt); @@ -1856,12 +1849,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, pt = old; } - spin_lock(&ppgtt->base.pd.lock); + spin_lock(&pd->lock); } - atomic_add(count, &pt->used_ptes); + atomic_add(count, &pt->used); } - spin_unlock(&ppgtt->base.pd.lock); + spin_unlock(&pd->lock); if (flush) { mark_tlbs_dirty(&ppgtt->base); @@ -1881,6 +1874,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) { struct i915_address_space * const vm = &ppgtt->base.vm; + struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *unused; u32 pde; int ret; @@ -1900,9 +1894,9 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) } gen6_initialize_pt(vm, vm->scratch_pt); - gen6_for_all_pdes(unused, &ppgtt->base.pd, pde) - ppgtt->base.pd.page_table[pde] = vm->scratch_pt; - spin_lock_init(&ppgtt->base.pd.lock); + + gen6_for_all_pdes(unused, pd, pde) + pd->entry[pde] = vm->scratch_pt; return 0; } @@ -1915,10 +1909,11 @@ static void gen6_ppgtt_free_scratch(struct i915_address_space *vm) static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) { + struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table *pt; u32 pde; - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) + gen6_for_all_pdes(pt, pd, pde) if (pt != ppgtt->base.vm.scratch_pt) free_pt(&ppgtt->base.vm, pt); } @@ -1982,6 +1977,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_ppgtt_free_pd(ppgtt); gen6_ppgtt_free_scratch(vm); + kfree(ppgtt->base.pd); } static int pd_vma_set_pages(struct i915_vma *vma) @@ -2007,10 +2003,10 @@ static int pd_vma_bind(struct i915_vma *vma, struct i915_page_table *pt; unsigned int pde; - ppgtt->base.pd.base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) gen6_write_pde(ppgtt, pde, pt); mark_tlbs_dirty(&ppgtt->base); @@ -2022,6 +2018,7 @@ static int pd_vma_bind(struct i915_vma *vma, static void pd_vma_unbind(struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; + struct i915_page_directory * const pd = ppgtt->base.pd; struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt; struct i915_page_table *pt; unsigned int pde; @@ -2030,12 +2027,12 @@ static void pd_vma_unbind(struct i915_vma *vma) return; /* Free all no longer used page tables */ - gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) { - if (atomic_read(&pt->used_ptes) || pt == scratch_pt) + gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { + if (atomic_read(&pt->used) || pt == scratch_pt) continue; free_pt(&ppgtt->base.vm, pt); - ppgtt->base.pd.page_table[pde] = scratch_pt; + pd->entry[pde] = scratch_pt; } ppgtt->scan_for_unused_pt = false; @@ -2164,9 +2161,15 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) goto err_free; } + ppgtt->base.pd = __alloc_pd(512); + if (!ppgtt->base.pd) { + err = -ENOMEM; + goto err_work; + } + err = gen6_ppgtt_init_scratch(ppgtt); if (err) - goto err_work; + goto err_pd; ppgtt->vma = pd_vma_create(ppgtt, GEN6_PD_SIZE); if (IS_ERR(ppgtt->vma)) { @@ -2178,6 +2181,8 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) err_scratch: gen6_ppgtt_free_scratch(&ppgtt->base.vm); +err_pd: + kfree(ppgtt->base.pd); err_work: kfree(ppgtt->work); err_free: diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 89437d0a721c..49f44071def4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -248,28 +248,14 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - atomic_t used_ptes; + atomic_t used; }; struct i915_page_directory { struct i915_page_dma base; - - struct i915_page_table *page_table[I915_PDES]; /* PDEs */ - atomic_t used_pdes; - spinlock_t lock; -}; - -struct i915_page_directory_pointer { - struct i915_page_dma base; - struct i915_page_directory **page_directory; - atomic_t used_pdpes; - spinlock_t lock; -}; - -struct i915_pml4 { - struct i915_page_dma base; - struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; + atomic_t used; spinlock_t lock; + void *entry[0]; }; struct i915_vma_ops { @@ -321,7 +307,7 @@ struct i915_address_space { struct i915_page_dma scratch_page; struct i915_page_table *scratch_pt; struct i915_page_directory *scratch_pd; - struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ + struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */ /** * List of vma currently bound. @@ -416,11 +402,7 @@ struct i915_ppgtt { struct i915_address_space vm; intel_engine_mask_t pd_dirty_engines; - union { - struct i915_pml4 pml4; /* GEN8+ & 48b PPGTT */ - struct i915_page_directory_pointer pdp; /* GEN8+ */ - struct i915_page_directory pd; /* GEN6-7 */ - }; + struct i915_page_directory *pd; }; struct gen6_ppgtt { @@ -454,7 +436,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) #define gen6_for_each_pde(pt, pd, start, length, iter) \ for (iter = gen6_pde_index(start); \ length > 0 && iter < I915_PDES && \ - (pt = (pd)->page_table[iter], true); \ + (pt = i915_pt_entry(pd, iter), true); \ ({ u32 temp = ALIGN(start+1, 1 << GEN6_PDE_SHIFT); \ temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) @@ -462,7 +444,7 @@ static inline struct gen6_ppgtt *to_gen6_ppgtt(struct i915_ppgtt *base) #define gen6_for_all_pdes(pt, pd, iter) \ for (iter = 0; \ iter < I915_PDES && \ - (pt = (pd)->page_table[iter], true); \ + (pt = i915_pt_entry(pd, iter), true); \ ++iter) static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) @@ -521,6 +503,27 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) return GEN8_3LVL_PDPES; } +static inline struct i915_page_table * +i915_pt_entry(const struct i915_page_directory * const pd, + const unsigned short n) +{ + return pd->entry[n]; +} + +static inline struct i915_page_directory * +i915_pd_entry(const struct i915_page_directory * const pdp, + const unsigned short n) +{ + return pdp->entry[n]; +} + +static inline struct i915_page_directory * +i915_pdp_entry(const struct i915_page_directory * const pml4, + const unsigned short n) +{ + return pml4->entry[n]; +} + /* Equivalent to the gen6 version, For each pde iterates over every pde * between from start until start + length. On gen8+ it simply iterates * over every page directory entry in a page directory. @@ -528,7 +531,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) #define gen8_for_each_pde(pt, pd, start, length, iter) \ for (iter = gen8_pde_index(start); \ length > 0 && iter < I915_PDES && \ - (pt = (pd)->page_table[iter], true); \ + (pt = i915_pt_entry(pd, iter), true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT); \ temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) @@ -536,7 +539,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) #define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ for (iter = gen8_pdpe_index(start); \ length > 0 && iter < i915_pdpes_per_pdp(vm) && \ - (pd = (pdp)->page_directory[iter], true); \ + (pd = i915_pd_entry(pdp, iter), true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) @@ -544,7 +547,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) #define gen8_for_each_pml4e(pdp, pml4, start, length, iter) \ for (iter = gen8_pml4e_index(start); \ length > 0 && iter < GEN8_PML4ES_PER_PML4 && \ - (pdp = (pml4)->pdps[iter], true); \ + (pdp = i915_pdp_entry(pml4, iter), true); \ ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT); \ temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) @@ -577,7 +580,10 @@ static inline u64 gen8_pte_count(u64 address, u64 length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) { - return px_dma(ppgtt->pdp.page_directory[n]); + struct i915_page_directory *pd; + + pd = i915_pdp_entry(ppgtt->pd, n); + return px_dma(pd); } static inline struct i915_ggtt *
All page directories are identical in function, only the position in the hierarchy differ. Use same base type for directory functionality. Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Matthew Auld <matthew.william.auld@gmail.com> Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com> Signed-off-by: Mika Kuoppala <mika.kuoppala@linux.intel.com> --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/gt/intel_lrc_reg.h | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/gvt/scheduler.c | 30 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 349 ++++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 64 ++-- 6 files changed, 234 insertions(+), 215 deletions(-)