From patchwork Sun Jul 7 21:00:14 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034323 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B2A2713A4 for ; Sun, 7 Jul 2019 21:01:05 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id A223A28179 for ; Sun, 7 Jul 2019 21:01:05 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 9686C281DB; Sun, 7 Jul 2019 21:01:05 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id CED8228179 for ; Sun, 7 Jul 2019 21:01:03 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 5C51E89AC6; Sun, 7 Jul 2019 21:01:03 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 789C489AC6 for ; Sun, 7 Jul 2019 21:01:02 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163705-1500050 for multiple; Sun, 07 Jul 2019 22:00:27 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:14 +0100 Message-Id: <20190707210024.26192-2-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 01/11] drm/i915/gtt: Use shallow dma pages for scratch X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP We only use the dma pages for scratch, and so do not need to allocate the extra storage for the shadow page directory. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 192 ++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +- 2 files changed, 85 insertions(+), 113 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 236c964dd761..937236913e70 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -594,25 +594,17 @@ static void cleanup_page_dma(struct i915_address_space *vm, #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) -#define fill_px(vm, px, v) fill_page_dma((vm), px_base(px), (v)) -#define fill32_px(vm, px, v) fill_page_dma_32((vm), px_base(px), (v)) +#define fill_px(px, v) fill_page_dma(px_base(px), (v)) +#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v)) -static void fill_page_dma(struct i915_address_space *vm, - struct i915_page_dma *p, - const u64 val) +static void fill_page_dma(struct i915_page_dma *p, const u64 val) { - u64 * const vaddr = kmap_atomic(p->page); - - memset64(vaddr, val, PAGE_SIZE / sizeof(val)); - - kunmap_atomic(vaddr); + kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES)); } -static void fill_page_dma_32(struct i915_address_space *vm, - struct i915_page_dma *p, - const u32 v) +static void fill_page_dma_32(struct i915_page_dma *p, const u32 v) { - fill_page_dma(vm, p, (u64)v << 32 | v); + fill_page_dma(p, (u64)v << 32 | v); } static int @@ -687,6 +679,21 @@ static void cleanup_scratch_page(struct i915_address_space *vm) __free_pages(p->page, order); } +static void free_scratch(struct i915_address_space *vm) +{ + if (!vm->scratch_page.daddr) /* set to 0 on clones */ + return; + + if (vm->scratch_pdp.daddr) + cleanup_page_dma(vm, &vm->scratch_pdp); + if (vm->scratch_pd.daddr) + cleanup_page_dma(vm, &vm->scratch_pd); + if (vm->scratch_pt.daddr) + cleanup_page_dma(vm, &vm->scratch_pt); + + cleanup_scratch_page(vm); +} + static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; @@ -711,18 +718,6 @@ static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) kfree(pt); } -static void gen8_initialize_pt(struct i915_address_space *vm, - struct i915_page_table *pt) -{ - fill_px(vm, pt, vm->scratch_pte); -} - -static void gen6_initialize_pt(struct i915_address_space *vm, - struct i915_page_table *pt) -{ - fill32_px(vm, pt, vm->scratch_pte); -} - static struct i915_page_directory *__alloc_pd(void) { struct i915_page_directory *pd; @@ -765,9 +760,11 @@ static void free_pd(struct i915_address_space *vm, kfree(pd); } -#define init_pd(vm, pd, to) { \ - fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ - memset_p((pd)->entry, (to), 512); \ +static void init_pd(struct i915_page_directory *pd, + struct i915_page_dma *scratch) +{ + fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC)); + memset_p(pd->entry, scratch, 512); } static inline void @@ -869,12 +866,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - GEM_BUG_ON(pt == vm->scratch_pt); + GEM_BUG_ON(px_base(pt) == &vm->scratch_pt); atomic_inc(&pt->used); gen8_ppgtt_clear_pt(vm, pt, start, length); - if (release_pd_entry(pd, pde, &pt->used, - px_base(vm->scratch_pt))) + if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt)) free_pt(vm, pt); } } @@ -890,12 +886,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - GEM_BUG_ON(pd == vm->scratch_pd); + GEM_BUG_ON(px_base(pd) == &vm->scratch_pd); atomic_inc(&pd->used); gen8_ppgtt_clear_pd(vm, pd, start, length); - if (release_pd_entry(pdp, pdpe, &pd->used, - px_base(vm->scratch_pd))) + if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd)) free_pd(vm, pd); } } @@ -921,12 +916,11 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!i915_vm_is_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - GEM_BUG_ON(pdp == vm->scratch_pdp); + GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp); atomic_inc(&pdp->used); gen8_ppgtt_clear_pdp(vm, pdp, start, length); - if (release_pd_entry(pml4, pml4e, &pdp->used, - px_base(vm->scratch_pdp))) + if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp)) free_pd(vm, pdp); } } @@ -1181,7 +1175,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm, int i; for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i] != vm->scratch_pt) + if (pd->entry[i] != &vm->scratch_pt) free_pt(vm, pd->entry[i]); } } @@ -1218,37 +1212,35 @@ static int gen8_init_scratch(struct i915_address_space *vm) I915_CACHE_LLC, vm->has_read_only); - vm->scratch_pt = alloc_pt(vm); - if (IS_ERR(vm->scratch_pt)) { - ret = PTR_ERR(vm->scratch_pt); + if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) { + ret = -ENOMEM; goto free_scratch_page; } + fill_page_dma(&vm->scratch_pt, vm->scratch_pte); - vm->scratch_pd = alloc_pd(vm); - if (IS_ERR(vm->scratch_pd)) { - ret = PTR_ERR(vm->scratch_pd); + if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) { + ret = -ENOMEM; goto free_pt; } + fill_page_dma(&vm->scratch_pd, + gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC)); if (i915_vm_is_4lvl(vm)) { - vm->scratch_pdp = alloc_pd(vm); - if (IS_ERR(vm->scratch_pdp)) { - ret = PTR_ERR(vm->scratch_pdp); + if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) { + ret = -ENOMEM; goto free_pd; } + fill_page_dma(&vm->scratch_pdp, + gen8_pde_encode(vm->scratch_pdp.daddr, + I915_CACHE_LLC)); } - gen8_initialize_pt(vm, vm->scratch_pt); - init_pd(vm, vm->scratch_pd, vm->scratch_pt); - if (i915_vm_is_4lvl(vm)) - init_pd(vm, vm->scratch_pdp, vm->scratch_pd); - return 0; free_pd: - free_pd(vm, vm->scratch_pd); + cleanup_page_dma(vm, &vm->scratch_pd); free_pt: - free_pt(vm, vm->scratch_pt); + cleanup_page_dma(vm, &vm->scratch_pt); free_scratch_page: cleanup_scratch_page(vm); @@ -1292,18 +1284,6 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) return 0; } -static void gen8_free_scratch(struct i915_address_space *vm) -{ - if (!vm->scratch_page.daddr) - return; - - if (i915_vm_is_4lvl(vm)) - free_pd(vm, vm->scratch_pdp); - free_pd(vm, vm->scratch_pd); - free_pt(vm, vm->scratch_pt); - cleanup_scratch_page(vm); -} - static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory *pdp) { @@ -1311,7 +1291,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, int i; for (i = 0; i < pdpes; i++) { - if (pdp->entry[i] == vm->scratch_pd) + if (pdp->entry[i] == &vm->scratch_pd) continue; gen8_free_page_tables(vm, pdp->entry[i]); @@ -1329,7 +1309,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); - if (pdp == ppgtt->vm.scratch_pdp) + if (px_base(pdp) == &ppgtt->vm.scratch_pdp) continue; gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); @@ -1351,7 +1331,7 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) else gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); - gen8_free_scratch(vm); + free_scratch(vm); } static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, @@ -1367,7 +1347,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_for_each_pde(pt, pd, start, length, pde) { const int count = gen8_pte_count(start, length); - if (pt == vm->scratch_pt) { + if (px_base(pt) == &vm->scratch_pt) { spin_unlock(&pd->lock); pt = fetch_and_zero(&alloc); @@ -1379,10 +1359,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, } if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) - gen8_initialize_pt(vm, pt); + fill_px(pt, vm->scratch_pte); spin_lock(&pd->lock); - if (pd->entry[pde] == vm->scratch_pt) { + if (pd->entry[pde] == &vm->scratch_pt) { set_pd_entry(pd, pde, pt); } else { alloc = pt; @@ -1414,7 +1394,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, spin_lock(&pdp->lock); gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (pd == vm->scratch_pd) { + if (px_base(pd) == &vm->scratch_pd) { spin_unlock(&pdp->lock); pd = fetch_and_zero(&alloc); @@ -1425,10 +1405,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto unwind; } - init_pd(vm, pd, vm->scratch_pt); + init_pd(pd, &vm->scratch_pt); spin_lock(&pdp->lock); - if (pdp->entry[pdpe] == vm->scratch_pd) { + if (pdp->entry[pdpe] == &vm->scratch_pd) { set_pd_entry(pdp, pdpe, pd); } else { alloc = pd; @@ -1449,7 +1429,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto out; unwind_pd: - if (release_pd_entry(pdp, pdpe, &pd->used, px_base(vm->scratch_pd))) + if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd)) free_pd(vm, pd); unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); @@ -1478,7 +1458,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, spin_lock(&pml4->lock); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (pdp == vm->scratch_pdp) { + if (px_base(pdp) == &vm->scratch_pdp) { spin_unlock(&pml4->lock); pdp = fetch_and_zero(&alloc); @@ -1489,10 +1469,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto unwind; } - init_pd(vm, pdp, vm->scratch_pd); + init_pd(pdp, &vm->scratch_pd); spin_lock(&pml4->lock); - if (pml4->entry[pml4e] == vm->scratch_pdp) { + if (pml4->entry[pml4e] == &vm->scratch_pdp) { set_pd_entry(pml4, pml4e, pdp); } else { alloc = pdp; @@ -1513,7 +1493,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto out; unwind_pdp: - if (release_pd_entry(pml4, pml4e, &pdp->used, px_base(vm->scratch_pdp))) + if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp)) free_pd(vm, pdp); unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); @@ -1537,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) if (IS_ERR(pd)) goto unwind; - init_pd(vm, pd, vm->scratch_pt); + init_pd(pd, &vm->scratch_pt); set_pd_entry(pdp, pdpe, pd); } @@ -1568,10 +1548,10 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) static void init_pd_n(struct i915_address_space *vm, struct i915_page_directory *pd, - struct i915_page_directory *to, + struct i915_page_dma *to, const unsigned int entries) { - const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC); + const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC); u64 * const vaddr = kmap_atomic(pd->base.page); memset64(vaddr, daddr, entries); @@ -1588,7 +1568,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) if (i915_vm_is_4lvl(vm)) { pd = alloc_pd(vm); if (!IS_ERR(pd)) - init_pd(vm, pd, vm->scratch_pdp); + init_pd(pd, &vm->scratch_pdp); return pd; } @@ -1605,7 +1585,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES); + init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES); return pd; } @@ -1678,7 +1658,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) err_free_pd: free_pd(&ppgtt->vm, ppgtt->pd); err_free_scratch: - gen8_free_scratch(&ppgtt->vm); + free_scratch(&ppgtt->vm); err_free: kfree(ppgtt); return ERR_PTR(err); @@ -1763,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, const unsigned int count = min(num_entries, GEN6_PTES - pte); gen6_pte_t *vaddr; - GEM_BUG_ON(pt == vm->scratch_pt); + GEM_BUG_ON(px_base(pt) == &vm->scratch_pt); num_entries -= count; @@ -1800,7 +1780,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; - GEM_BUG_ON(i915_pt_entry(pd, act_pt) == vm->scratch_pt); + GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt); vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { @@ -1845,7 +1825,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); - if (pt == vm->scratch_pt) { + if (px_base(pt) == &vm->scratch_pt) { spin_unlock(&pd->lock); pt = fetch_and_zero(&alloc); @@ -1856,10 +1836,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, goto unwind_out; } - gen6_initialize_pt(vm, pt); + fill32_px(pt, vm->scratch_pte); spin_lock(&pd->lock); - if (pd->entry[pde] == vm->scratch_pt) { + if (pd->entry[pde] == &vm->scratch_pt) { pd->entry[pde] = pt; if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { @@ -1908,26 +1888,18 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) I915_CACHE_NONE, PTE_READ_ONLY); - vm->scratch_pt = alloc_pt(vm); - if (IS_ERR(vm->scratch_pt)) { + if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) { cleanup_scratch_page(vm); - return PTR_ERR(vm->scratch_pt); + return -ENOMEM; } - - gen6_initialize_pt(vm, vm->scratch_pt); + fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte); gen6_for_all_pdes(unused, pd, pde) - pd->entry[pde] = vm->scratch_pt; + pd->entry[pde] = &vm->scratch_pt; return 0; } -static void gen6_ppgtt_free_scratch(struct i915_address_space *vm) -{ - free_pt(vm, vm->scratch_pt); - cleanup_scratch_page(vm); -} - static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) { struct i915_page_directory * const pd = ppgtt->base.pd; @@ -1935,7 +1907,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) u32 pde; gen6_for_all_pdes(pt, pd, pde) - if (pt != ppgtt->base.vm.scratch_pt) + if (px_base(pt) != &ppgtt->base.vm.scratch_pt) free_pt(&ppgtt->base.vm, pt); } @@ -1950,7 +1922,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) mutex_unlock(&i915->drm.struct_mutex); gen6_ppgtt_free_pd(ppgtt); - gen6_ppgtt_free_scratch(vm); + free_scratch(vm); kfree(ppgtt->base.pd); } @@ -1993,7 +1965,7 @@ static void pd_vma_unbind(struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table * const scratch_pt = ppgtt->base.vm.scratch_pt; + struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt; struct i915_page_table *pt; unsigned int pde; @@ -2002,11 +1974,11 @@ static void pd_vma_unbind(struct i915_vma *vma) /* Free all no longer used page tables */ gen6_for_all_pdes(pt, ppgtt->base.pd, pde) { - if (atomic_read(&pt->used) || pt == scratch_pt) + if (px_base(pt) == scratch || atomic_read(&pt->used)) continue; free_pt(&ppgtt->base.vm, pt); - pd->entry[pde] = scratch_pt; + pd->entry[pde] = scratch; } ppgtt->scan_for_unused_pt = false; @@ -2148,7 +2120,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) return &ppgtt->base; err_scratch: - gen6_ppgtt_free_scratch(&ppgtt->base.vm); + free_scratch(&ppgtt->base.vm); err_pd: kfree(ppgtt->base.pd); err_free: diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 57a68ef4eda7..860850411a1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -304,9 +304,9 @@ struct i915_address_space { u64 scratch_pte; int scratch_order; struct i915_page_dma scratch_page; - struct i915_page_table *scratch_pt; - struct i915_page_directory *scratch_pd; - struct i915_page_directory *scratch_pdp; /* GEN8+ & 48b PPGTT */ + struct i915_page_dma scratch_pt; + struct i915_page_dma scratch_pd; + struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */ /** * List of vma currently bound. From patchwork Sun Jul 7 21:00:15 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034317 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7456E13BD for ; Sun, 7 Jul 2019 21:00:56 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 61CA927CAF for ; Sun, 7 Jul 2019 21:00:56 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 5691A27F93; Sun, 7 Jul 2019 21:00:56 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 945CC27CAF for ; Sun, 7 Jul 2019 21:00:55 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0A62B89AB5; Sun, 7 Jul 2019 21:00:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 983DB89A9A for ; Sun, 7 Jul 2019 21:00:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163706-1500050 for multiple; Sun, 07 Jul 2019 22:00:27 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:15 +0100 Message-Id: <20190707210024.26192-3-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 02/11] drm/i915/gtt: Wrap page_table with page_directory X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP The page directory extends the page table with the shadow entries. Make the page directory struct embed the page table for easier code reuse. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 100 ++++++++++----------- drivers/gpu/drm/i915/i915_gem_gtt.h | 31 +++++-- 3 files changed, 70 insertions(+), 63 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index b33cfc56f623..9163b5238082 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1503,7 +1503,7 @@ static int load_pd_dir(struct i915_request *rq, const struct i915_ppgtt *ppgtt) *cs++ = MI_LOAD_REGISTER_IMM(1); *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine->mmio_base)); - *cs++ = ppgtt->pd->base.ggtt_offset << 10; + *cs++ = px_base(ppgtt->pd)->ggtt_offset << 10; intel_ring_advance(rq, cs); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 937236913e70..1fa93f56792e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -107,6 +107,8 @@ * */ +#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt) + static int i915_get_ggtt_vma_pages(struct i915_vma *vma); @@ -712,28 +714,17 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) return pt; } -static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) -{ - cleanup_page_dma(vm, &pt->base); - kfree(pt); -} - static struct i915_page_directory *__alloc_pd(void) { struct i915_page_directory *pd; pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); - if (unlikely(!pd)) return NULL; - memset(&pd->base, 0, sizeof(pd->base)); - atomic_set(&pd->used, 0); + atomic_set(px_used(pd), 0); spin_lock_init(&pd->lock); - /* for safety */ - pd->entry[0] = NULL; - return pd; } @@ -745,7 +736,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - if (unlikely(setup_page_dma(vm, &pd->base))) { + if (unlikely(setup_page_dma(vm, px_base(pd)))) { kfree(pd); return ERR_PTR(-ENOMEM); } @@ -753,13 +744,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return pd; } -static void free_pd(struct i915_address_space *vm, - struct i915_page_directory *pd) +static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) { - cleanup_page_dma(vm, &pd->base); + cleanup_page_dma(vm, pd); kfree(pd); } +#define free_px(vm, px) free_pd(vm, px_base(px)) + static void init_pd(struct i915_page_directory *pd, struct i915_page_dma *scratch) { @@ -784,9 +776,9 @@ __set_pd_entry(struct i915_page_directory * const pd, struct i915_page_dma * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { - GEM_BUG_ON(atomic_read(&pd->used) > 512); + GEM_BUG_ON(atomic_read(px_used(pd)) > 512); - atomic_inc(&pd->used); + atomic_inc(px_used(pd)); pd->entry[pde] = to; write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); } @@ -797,11 +789,11 @@ __clear_pd_entry(struct i915_page_directory * const pd, struct i915_page_dma * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { - GEM_BUG_ON(atomic_read(&pd->used) == 0); + GEM_BUG_ON(atomic_read(px_used(pd)) == 0); write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); pd->entry[pde] = to; - atomic_dec(&pd->used); + atomic_dec(px_used(pd)); } #define set_pd_entry(pd, pde, to) \ @@ -813,13 +805,13 @@ __clear_pd_entry(struct i915_page_directory * const pd, static bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short pde, - atomic_t *counter, + struct i915_page_table * const pt, struct i915_page_dma * const scratch) { bool free = false; spin_lock(&pd->lock); - if (atomic_dec_and_test(counter)) { + if (atomic_dec_and_test(&pt->used)) { clear_pd_entry(pd, pde, scratch); free = true; } @@ -870,8 +862,8 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, atomic_inc(&pt->used); gen8_ppgtt_clear_pt(vm, pt, start, length); - if (release_pd_entry(pd, pde, &pt->used, &vm->scratch_pt)) - free_pt(vm, pt); + if (release_pd_entry(pd, pde, pt, &vm->scratch_pt)) + free_px(vm, pt); } } @@ -888,10 +880,10 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { GEM_BUG_ON(px_base(pd) == &vm->scratch_pd); - atomic_inc(&pd->used); + atomic_inc(px_used(pd)); gen8_ppgtt_clear_pd(vm, pd, start, length); - if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd)) - free_pd(vm, pd); + if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) + free_px(vm, pd); } } @@ -918,10 +910,10 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp); - atomic_inc(&pdp->used); + atomic_inc(px_used(pdp)); gen8_ppgtt_clear_pdp(vm, pdp, start, length); - if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp)) - free_pd(vm, pdp); + if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) + free_px(vm, pdp); } } @@ -1176,7 +1168,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm, for (i = 0; i < I915_PDES; i++) { if (pd->entry[i] != &vm->scratch_pt) - free_pt(vm, pd->entry[i]); + free_pd(vm, pd->entry[i]); } } @@ -1255,9 +1247,9 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) int i; if (create) - atomic_inc(&ppgtt->pd->used); /* never remove */ + atomic_inc(px_used(ppgtt->pd)); /* never remove */ else - atomic_dec(&ppgtt->pd->used); + atomic_dec(px_used(ppgtt->pd)); if (i915_vm_is_4lvl(vm)) { const u64 daddr = px_dma(ppgtt->pd); @@ -1298,7 +1290,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, free_pd(vm, pdp->entry[i]); } - free_pd(vm, pdp); + free_px(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) @@ -1315,7 +1307,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); } - free_pd(&ppgtt->vm, pml4); + free_px(&ppgtt->vm, pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1379,7 +1371,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_ppgtt_clear_pd(vm, pd, from, start - from); out: if (alloc) - free_pt(vm, alloc); + free_px(vm, alloc); return ret; } @@ -1415,7 +1407,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, pd = pdp->entry[pdpe]; } } - atomic_inc(&pd->used); + atomic_inc(px_used(pd)); spin_unlock(&pdp->lock); ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); @@ -1423,19 +1415,19 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto unwind_pd; spin_lock(&pdp->lock); - atomic_dec(&pd->used); + atomic_dec(px_used(pd)); } spin_unlock(&pdp->lock); goto out; unwind_pd: - if (release_pd_entry(pdp, pdpe, &pd->used, &vm->scratch_pd)) - free_pd(vm, pd); + if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) + free_px(vm, pd); unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); out: if (alloc) - free_pd(vm, alloc); + free_px(vm, alloc); return ret; } @@ -1479,7 +1471,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, pdp = pml4->entry[pml4e]; } } - atomic_inc(&pdp->used); + atomic_inc(px_used(pdp)); spin_unlock(&pml4->lock); ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); @@ -1487,19 +1479,19 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto unwind_pdp; spin_lock(&pml4->lock); - atomic_dec(&pdp->used); + atomic_dec(px_used(pdp)); } spin_unlock(&pml4->lock); goto out; unwind_pdp: - if (release_pd_entry(pml4, pml4e, &pdp->used, &vm->scratch_pdp)) - free_pd(vm, pdp); + if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) + free_px(vm, pdp); unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); out: if (alloc) - free_pd(vm, alloc); + free_px(vm, alloc); return ret; } @@ -1525,7 +1517,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); - atomic_set(&pdp->used, 0); + atomic_set(px_used(pdp), 0); return -ENOMEM; } @@ -1552,7 +1544,7 @@ static void init_pd_n(struct i915_address_space *vm, const unsigned int entries) { const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC); - u64 * const vaddr = kmap_atomic(pd->base.page); + u64 * const vaddr = kmap_atomic_px(pd); memset64(vaddr, daddr, entries); kunmap_atomic(vaddr); @@ -1580,7 +1572,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) pd->entry[GEN8_3LVL_PDPES] = NULL; - if (unlikely(setup_page_dma(vm, &pd->base))) { + if (unlikely(setup_page_dma(vm, px_base(pd)))) { kfree(pd); return ERR_PTR(-ENOMEM); } @@ -1656,7 +1648,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) return ppgtt; err_free_pd: - free_pd(&ppgtt->vm, ppgtt->pd); + free_px(&ppgtt->vm, ppgtt->pd); err_free_scratch: free_scratch(&ppgtt->vm); err_free: @@ -1867,7 +1859,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_ppgtt_clear_range(vm, from, start - from); out: if (alloc) - free_pt(vm, alloc); + free_px(vm, alloc); intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); return ret; } @@ -1908,7 +1900,7 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) gen6_for_all_pdes(pt, pd, pde) if (px_base(pt) != &ppgtt->base.vm.scratch_pt) - free_pt(&ppgtt->base.vm, pt); + free_px(&ppgtt->base.vm, pt); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) @@ -1949,7 +1941,7 @@ static int pd_vma_bind(struct i915_vma *vma, struct i915_page_table *pt; unsigned int pde; - ppgtt->base.pd->base.ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); + px_base(ppgtt->base.pd)->ggtt_offset = ggtt_offset * sizeof(gen6_pte_t); ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + ggtt_offset; gen6_for_all_pdes(pt, ppgtt->base.pd, pde) @@ -1977,7 +1969,7 @@ static void pd_vma_unbind(struct i915_vma *vma) if (px_base(pt) == scratch || atomic_read(&pt->used)) continue; - free_pt(&ppgtt->base.vm, pt); + free_px(&ppgtt->base.vm, pt); pd->entry[pde] = scratch; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 860850411a1b..48bb8c5125e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -240,21 +240,37 @@ struct i915_page_dma { }; }; -#define px_base(px) (&(px)->base) -#define px_dma(px) (px_base(px)->daddr) - struct i915_page_table { struct i915_page_dma base; atomic_t used; }; struct i915_page_directory { - struct i915_page_dma base; - atomic_t used; + struct i915_page_table pt; spinlock_t lock; void *entry[512]; }; +#define __px_choose_expr(x, type, expr, other) \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), type) || \ + __builtin_types_compatible_p(typeof(x), const type), \ + ({ type __x = (type)(x); expr; }), \ + other) + +#define px_base(px) \ + __px_choose_expr(px, struct i915_page_dma *, __x, \ + __px_choose_expr(px, struct i915_page_table *, &__x->base, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ + (void)0))) +#define px_dma(px) (px_base(px)->daddr) + +#define px_pt(px) \ + __px_choose_expr(px, struct i915_page_table *, __x, \ + __px_choose_expr(px, struct i915_page_directory *, &__x->pt, \ + (void)0)) +#define px_used(px) (&px_pt(px)->used) + struct i915_vma_ops { /* Map an object into an address space with the given cache flags. */ int (*bind_vma)(struct i915_vma *vma, @@ -589,10 +605,9 @@ static inline u64 gen8_pte_count(u64 address, u64 length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) { - struct i915_page_directory *pd; + struct i915_page_dma *pt = ppgtt->pd->entry[n]; - pd = i915_pdp_entry(ppgtt->pd, n); - return px_dma(pd); + return px_dma(pt); } static inline struct i915_ggtt * From patchwork Sun Jul 7 21:00:16 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034315 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AC7DB13A4 for ; Sun, 7 Jul 2019 21:00:55 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 9BB9327E01 for ; Sun, 7 Jul 2019 21:00:55 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 8E10B27F93; Sun, 7 Jul 2019 21:00:55 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 90A9327E01 for ; Sun, 7 Jul 2019 21:00:54 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 3C18F89ABA; Sun, 7 Jul 2019 21:00:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9B65389AB2 for ; Sun, 7 Jul 2019 21:00:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163707-1500050 for multiple; Sun, 07 Jul 2019 22:00:27 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:16 +0100 Message-Id: <20190707210024.26192-4-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 03/11] drm/i915/gtt: Reorder gen8 ppgtt free/clear/alloc X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP In preparation for refactoring the free/clear/alloc, first move the code around so that we can avoid forward declarations in the next set of patches. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 673 ++++++++++++++-------------- 1 file changed, 337 insertions(+), 336 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1fa93f56792e..da4db76ce054 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -831,6 +831,104 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt) ppgtt->pd_dirty_engines = ALL_ENGINES; } +static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) +{ + struct i915_address_space *vm = &ppgtt->vm; + struct drm_i915_private *dev_priv = vm->i915; + enum vgt_g2v_type msg; + int i; + + if (create) + atomic_inc(px_used(ppgtt->pd)); /* never remove */ + else + atomic_dec(px_used(ppgtt->pd)); + + if (i915_vm_is_4lvl(vm)) { + const u64 daddr = px_dma(ppgtt->pd); + + I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); + I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); + + msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); + } else { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + + I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); + I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); + } + + msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : + VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); + } + + I915_WRITE(vgtif_reg(g2v_notify), msg); + + return 0; +} + +static void gen8_free_page_tables(struct i915_address_space *vm, + struct i915_page_directory *pd) +{ + int i; + + for (i = 0; i < I915_PDES; i++) { + if (pd->entry[i] != &vm->scratch_pt) + free_pd(vm, pd->entry[i]); + } +} + +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, + struct i915_page_directory *pdp) +{ + const unsigned int pdpes = i915_pdpes_per_pdp(vm); + int i; + + for (i = 0; i < pdpes; i++) { + if (pdp->entry[i] == &vm->scratch_pd) + continue; + + gen8_free_page_tables(vm, pdp->entry[i]); + free_pd(vm, pdp->entry[i]); + } + + free_px(vm, pdp); +} + +static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) +{ + struct i915_page_directory * const pml4 = ppgtt->pd; + int i; + + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); + + if (px_base(pdp) == &ppgtt->vm.scratch_pdp) + continue; + + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); + } + + free_px(&ppgtt->vm, pml4); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct drm_i915_private *i915 = vm->i915; + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + + if (intel_vgpu_active(i915)) + gen8_ppgtt_notify_vgt(ppgtt, false); + + if (i915_vm_is_4lvl(vm)) + gen8_ppgtt_cleanup_4lvl(ppgtt); + else + gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); + + free_scratch(vm); +} + /* Removes entries from a single page table, releasing it if it's empty. * Caller can use the return value to update higher-level entries. */ @@ -917,95 +1015,265 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, } } -static inline struct sgt_dma { - struct scatterlist *sg; - dma_addr_t dma, max; -} sgt_dma(struct i915_vma *vma) { - struct scatterlist *sg = vma->pages->sgl; - dma_addr_t addr = sg_dma_address(sg); - return (struct sgt_dma) { sg, addr, addr + sg->length }; -} - -struct gen8_insert_pte { - u16 pml4e; - u16 pdpe; - u16 pde; - u16 pte; -}; -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - return (struct gen8_insert_pte) { - gen8_pml4e_index(start), - gen8_pdpe_index(start), - gen8_pde_index(start), - gen8_pte_index(start), - }; -} + struct i915_page_table *pt, *alloc = NULL; + u64 from = start; + unsigned int pde; + int ret = 0; -static __always_inline bool -gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, - struct i915_page_directory *pdp, - struct sgt_dma *iter, - struct gen8_insert_pte *idx, - enum i915_cache_level cache_level, - u32 flags) -{ - struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); - gen8_pte_t *vaddr; - bool ret; + spin_lock(&pd->lock); + gen8_for_each_pde(pt, pd, start, length, pde) { + const int count = gen8_pte_count(start, length); - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = i915_pd_entry(pdp, idx->pdpe); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); - do { - vaddr[idx->pte] = pte_encode | iter->dma; + if (px_base(pt) == &vm->scratch_pt) { + spin_unlock(&pd->lock); - iter->dma += I915_GTT_PAGE_SIZE; - if (iter->dma >= iter->max) { - iter->sg = __sg_next(iter->sg); - if (!iter->sg) { - ret = false; - break; + pt = fetch_and_zero(&alloc); + if (!pt) + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + goto unwind; } - iter->dma = sg_dma_address(iter->sg); - iter->max = iter->dma + iter->sg->length; + if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) + fill_px(pt, vm->scratch_pte); + + spin_lock(&pd->lock); + if (pd->entry[pde] == &vm->scratch_pt) { + set_pd_entry(pd, pde, pt); + } else { + alloc = pt; + pt = pd->entry[pde]; + } } - if (++idx->pte == GEN8_PTES) { - idx->pte = 0; + atomic_add(count, &pt->used); + } + spin_unlock(&pd->lock); + goto out; - if (++idx->pde == I915_PDES) { - idx->pde = 0; +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); +out: + if (alloc) + free_px(vm, alloc); + return ret; +} - /* Limited by sg length for 3lvl */ - if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { - idx->pdpe = 0; - ret = true; - break; - } +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory *pdp, + u64 start, u64 length) +{ + struct i915_page_directory *pd, *alloc = NULL; + u64 from = start; + unsigned int pdpe; + int ret = 0; - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = pdp->entry[idx->pdpe]; + spin_lock(&pdp->lock); + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + if (px_base(pd) == &vm->scratch_pd) { + spin_unlock(&pdp->lock); + + pd = fetch_and_zero(&alloc); + if (!pd) + pd = alloc_pd(vm); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto unwind; } - kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); + init_pd(pd, &vm->scratch_pt); + + spin_lock(&pdp->lock); + if (pdp->entry[pdpe] == &vm->scratch_pd) { + set_pd_entry(pdp, pdpe, pd); + } else { + alloc = pd; + pd = pdp->entry[pdpe]; + } } - } while (1); - kunmap_atomic(vaddr); + atomic_inc(px_used(pd)); + spin_unlock(&pdp->lock); + + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) + goto unwind_pd; + + spin_lock(&pdp->lock); + atomic_dec(px_used(pd)); + } + spin_unlock(&pdp->lock); + goto out; +unwind_pd: + if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) + free_px(vm, pd); +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); +out: + if (alloc) + free_px(vm, alloc); return ret; } -static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, - struct i915_vma *vma, - enum i915_cache_level cache_level, - u32 flags) +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + return gen8_ppgtt_alloc_pdp(vm, + i915_vm_to_ppgtt(vm)->pd, start, length); +} + +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_page_directory * const pml4 = ppgtt->pd; + struct i915_page_directory *pdp, *alloc = NULL; + u64 from = start; + int ret = 0; + u32 pml4e; + + spin_lock(&pml4->lock); + gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { + if (px_base(pdp) == &vm->scratch_pdp) { + spin_unlock(&pml4->lock); + + pdp = fetch_and_zero(&alloc); + if (!pdp) + pdp = alloc_pd(vm); + if (IS_ERR(pdp)) { + ret = PTR_ERR(pdp); + goto unwind; + } + + init_pd(pdp, &vm->scratch_pd); + + spin_lock(&pml4->lock); + if (pml4->entry[pml4e] == &vm->scratch_pdp) { + set_pd_entry(pml4, pml4e, pdp); + } else { + alloc = pdp; + pdp = pml4->entry[pml4e]; + } + } + atomic_inc(px_used(pdp)); + spin_unlock(&pml4->lock); + + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) + goto unwind_pdp; + + spin_lock(&pml4->lock); + atomic_dec(px_used(pdp)); + } + spin_unlock(&pml4->lock); + goto out; + +unwind_pdp: + if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) + free_px(vm, pdp); +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); +out: + if (alloc) + free_px(vm, alloc); + return ret; +} + +static inline struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +} sgt_dma(struct i915_vma *vma) { + struct scatterlist *sg = vma->pages->sgl; + dma_addr_t addr = sg_dma_address(sg); + return (struct sgt_dma) { sg, addr, addr + sg->length }; +} + +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; +} + +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, + struct i915_page_directory *pdp, + struct sgt_dma *iter, + struct gen8_insert_pte *idx, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); + gen8_pte_t *vaddr; + bool ret; + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); + pd = i915_pd_entry(pdp, idx->pdpe); + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); + do { + vaddr[idx->pte] = pte_encode | iter->dma; + + iter->dma += I915_GTT_PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } + + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; + } + + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + + /* Limited by sg length for 3lvl */ + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; + ret = true; + break; + } + + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); + pd = pdp->entry[idx->pdpe]; + } + + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); + } + } while (1); + kunmap_atomic(vaddr); + + return ret; +} + +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); @@ -1161,17 +1429,6 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, } } -static void gen8_free_page_tables(struct i915_address_space *vm, - struct i915_page_directory *pd) -{ - int i; - - for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i] != &vm->scratch_pt) - free_pd(vm, pd->entry[i]); - } -} - static int gen8_init_scratch(struct i915_address_space *vm) { int ret; @@ -1239,262 +1496,6 @@ static int gen8_init_scratch(struct i915_address_space *vm) return ret; } -static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) -{ - struct i915_address_space *vm = &ppgtt->vm; - struct drm_i915_private *dev_priv = vm->i915; - enum vgt_g2v_type msg; - int i; - - if (create) - atomic_inc(px_used(ppgtt->pd)); /* never remove */ - else - atomic_dec(px_used(ppgtt->pd)); - - if (i915_vm_is_4lvl(vm)) { - const u64 daddr = px_dma(ppgtt->pd); - - I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); - - msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); - } else { - for (i = 0; i < GEN8_3LVL_PDPES; i++) { - const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); - - I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); - I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); - } - - msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : - VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); - } - - I915_WRITE(vgtif_reg(g2v_notify), msg); - - return 0; -} - -static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, - struct i915_page_directory *pdp) -{ - const unsigned int pdpes = i915_pdpes_per_pdp(vm); - int i; - - for (i = 0; i < pdpes; i++) { - if (pdp->entry[i] == &vm->scratch_pd) - continue; - - gen8_free_page_tables(vm, pdp->entry[i]); - free_pd(vm, pdp->entry[i]); - } - - free_px(vm, pdp); -} - -static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) -{ - struct i915_page_directory * const pml4 = ppgtt->pd; - int i; - - for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { - struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); - - if (px_base(pdp) == &ppgtt->vm.scratch_pdp) - continue; - - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); - } - - free_px(&ppgtt->vm, pml4); -} - -static void gen8_ppgtt_cleanup(struct i915_address_space *vm) -{ - struct drm_i915_private *i915 = vm->i915; - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (intel_vgpu_active(i915)) - gen8_ppgtt_notify_vgt(ppgtt, false); - - if (i915_vm_is_4lvl(vm)) - gen8_ppgtt_cleanup_4lvl(ppgtt); - else - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); - - free_scratch(vm); -} - -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - struct i915_page_table *pt, *alloc = NULL; - u64 from = start; - unsigned int pde; - int ret = 0; - - spin_lock(&pd->lock); - gen8_for_each_pde(pt, pd, start, length, pde) { - const int count = gen8_pte_count(start, length); - - if (px_base(pt) == &vm->scratch_pt) { - spin_unlock(&pd->lock); - - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind; - } - - if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) - fill_px(pt, vm->scratch_pte); - - spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch_pt) { - set_pd_entry(pd, pde, pt); - } else { - alloc = pt; - pt = pd->entry[pde]; - } - } - - atomic_add(count, &pt->used); - } - spin_unlock(&pd->lock); - goto out; - -unwind: - gen8_ppgtt_clear_pd(vm, pd, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, - struct i915_page_directory *pdp, - u64 start, u64 length) -{ - struct i915_page_directory *pd, *alloc = NULL; - u64 from = start; - unsigned int pdpe; - int ret = 0; - - spin_lock(&pdp->lock); - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (px_base(pd) == &vm->scratch_pd) { - spin_unlock(&pdp->lock); - - pd = fetch_and_zero(&alloc); - if (!pd) - pd = alloc_pd(vm); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto unwind; - } - - init_pd(pd, &vm->scratch_pt); - - spin_lock(&pdp->lock); - if (pdp->entry[pdpe] == &vm->scratch_pd) { - set_pd_entry(pdp, pdpe, pd); - } else { - alloc = pd; - pd = pdp->entry[pdpe]; - } - } - atomic_inc(px_used(pd)); - spin_unlock(&pdp->lock); - - ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (unlikely(ret)) - goto unwind_pd; - - spin_lock(&pdp->lock); - atomic_dec(px_used(pd)); - } - spin_unlock(&pdp->lock); - goto out; - -unwind_pd: - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) - free_px(vm, pd); -unwind: - gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - return gen8_ppgtt_alloc_pdp(vm, - i915_vm_to_ppgtt(vm)->pd, start, length); -} - -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp, *alloc = NULL; - u64 from = start; - int ret = 0; - u32 pml4e; - - spin_lock(&pml4->lock); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (px_base(pdp) == &vm->scratch_pdp) { - spin_unlock(&pml4->lock); - - pdp = fetch_and_zero(&alloc); - if (!pdp) - pdp = alloc_pd(vm); - if (IS_ERR(pdp)) { - ret = PTR_ERR(pdp); - goto unwind; - } - - init_pd(pdp, &vm->scratch_pd); - - spin_lock(&pml4->lock); - if (pml4->entry[pml4e] == &vm->scratch_pdp) { - set_pd_entry(pml4, pml4e, pdp); - } else { - alloc = pdp; - pdp = pml4->entry[pml4e]; - } - } - atomic_inc(px_used(pdp)); - spin_unlock(&pml4->lock); - - ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); - if (unlikely(ret)) - goto unwind_pdp; - - spin_lock(&pml4->lock); - atomic_dec(px_used(pdp)); - } - spin_unlock(&pml4->lock); - goto out; - -unwind_pdp: - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) - free_px(vm, pdp); -unwind: - gen8_ppgtt_clear_4lvl(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; -} - static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) { struct i915_address_space *vm = &ppgtt->vm; From patchwork Sun Jul 7 21:00:17 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034313 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8379613A4 for ; Sun, 7 Jul 2019 21:00:54 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7405B27CAF for ; Sun, 7 Jul 2019 21:00:54 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 6843127F93; Sun, 7 Jul 2019 21:00:54 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 91BD427CAF for ; Sun, 7 Jul 2019 21:00:53 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id A8FE589A9A; Sun, 7 Jul 2019 21:00:51 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 9DDE589AB5 for ; Sun, 7 Jul 2019 21:00:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163708-1500050 for multiple; Sun, 07 Jul 2019 22:00:27 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:17 +0100 Message-Id: <20190707210024.26192-5-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 04/11] drm/i915/gtt: Markup i915_ppgtt depth X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP This will be useful to consolidate recursive code. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index da4db76ce054..2fc60e8acd9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1537,6 +1537,8 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma; ppgtt->vm.vma_ops.set_pages = ppgtt_set_pages; ppgtt->vm.vma_ops.clear_pages = clear_pages; + + ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; } static void init_pd_n(struct i915_address_space *vm, @@ -2086,6 +2088,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) return ERR_PTR(-ENOMEM); ppgtt_init(&ppgtt->base, &i915->gt); + ppgtt->base.vm.top = 1; ppgtt->base.vm.allocate_va_range = gen6_alloc_va_range; ppgtt->base.vm.clear_range = gen6_ppgtt_clear_range; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 48bb8c5125e3..119b6d33b266 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -323,6 +323,7 @@ struct i915_address_space { struct i915_page_dma scratch_pt; struct i915_page_dma scratch_pd; struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */ + int top; /** * List of vma currently bound. From patchwork Sun Jul 7 21:00:18 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034321 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 87CD113A4 for ; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7906127CAF for ; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 6D84C27E01; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 28E4F27FB3 for ; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 384B589AB7; Sun, 7 Jul 2019 21:00:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id A050389AB7 for ; Sun, 7 Jul 2019 21:00:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163709-1500050 for multiple; Sun, 07 Jul 2019 22:00:27 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:18 +0100 Message-Id: <20190707210024.26192-6-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 05/11] drm/i915/gtt: Compute the radix for gen8 page table levels X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP The radix levels of each page directory are easily determined so replace the numerous hardcoded constants with precomputed derived constants. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2fc60e8acd9a..271305705c1c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -868,6 +868,45 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create) return 0; } +/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */ +#define gen8_pd_shift(lvl) ((lvl) * ilog2(I915_PDES)) +#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl)) +#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl)) +#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl)) + +static inline unsigned int +gen8_pd_range(u64 addr, u64 end, int lvl, unsigned int *idx) +{ + const int shift = gen8_pd_shift(lvl); + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(addr >= end); + end += ~mask >> gen8_pd_shift(1); + + *idx = i915_pde_index(addr, shift); + if ((addr ^ end) & mask) + return I915_PDES - *idx; + else + return i915_pde_index(end, shift) - *idx; +} + +static inline bool gen8_pd_subsumes(u64 addr, u64 end, int lvl) +{ + const u64 mask = ~0ull << gen8_pd_shift(lvl + 1); + + GEM_BUG_ON(addr >= end); + return (addr ^ end) & mask && (addr & ~mask) == 0; +} + +static inline unsigned int gen8_pt_count(u64 addr, u64 end) +{ + GEM_BUG_ON(addr >= end); + if ((addr ^ end) & ~I915_PDE_MASK) + return I915_PDES - (addr & I915_PDE_MASK); + else + return end - addr; +} + static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { From patchwork Sun Jul 7 21:00:19 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034333 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 26D891510 for ; Sun, 7 Jul 2019 21:02:29 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 1492B28179 for ; Sun, 7 Jul 2019 21:02:29 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 08A9F281DB; Sun, 7 Jul 2019 21:02:29 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id EBB2228179 for ; Sun, 7 Jul 2019 21:02:27 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 74CB789AE6; Sun, 7 Jul 2019 21:02:27 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 3339489ABA for ; Sun, 7 Jul 2019 21:02:20 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163710-1500050 for multiple; Sun, 07 Jul 2019 22:00:28 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:19 +0100 Message-Id: <20190707210024.26192-7-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 06/11] drm/i915/gtt: Convert vm->scratch into an array X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Each level has its own scratch. Make the levels more obvious by forgoing the fancy similarly names and replace them with a number. 0 is the bottom most level, the physical page used for actual data; 1+ are the page directories. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 206 ++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 14 +- 2 files changed, 99 insertions(+), 121 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 271305705c1c..b7882f06214a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -634,7 +634,7 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL; do { - int order = get_order(size); + unsigned int order = get_order(size); struct page *page; dma_addr_t addr; @@ -653,8 +653,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) if (unlikely(!IS_ALIGNED(addr, size))) goto unmap_page; - vm->scratch_page.page = page; - vm->scratch_page.daddr = addr; + vm->scratch[0].base.page = page; + vm->scratch[0].base.daddr = addr; vm->scratch_order = order; return 0; @@ -673,8 +673,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) static void cleanup_scratch_page(struct i915_address_space *vm) { - struct i915_page_dma *p = &vm->scratch_page; - int order = vm->scratch_order; + struct i915_page_dma *p = px_base(&vm->scratch[0]); + unsigned int order = vm->scratch_order; dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT, PCI_DMA_BIDIRECTIONAL); @@ -683,15 +683,16 @@ static void cleanup_scratch_page(struct i915_address_space *vm) static void free_scratch(struct i915_address_space *vm) { - if (!vm->scratch_page.daddr) /* set to 0 on clones */ + int i; + + if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */ return; - if (vm->scratch_pdp.daddr) - cleanup_page_dma(vm, &vm->scratch_pdp); - if (vm->scratch_pd.daddr) - cleanup_page_dma(vm, &vm->scratch_pd); - if (vm->scratch_pt.daddr) - cleanup_page_dma(vm, &vm->scratch_pt); + for (i = 1; i <= vm->top; i++) { + if (!px_dma(&vm->scratch[i])) + break; + cleanup_page_dma(vm, px_base(&vm->scratch[i])); + } cleanup_scratch_page(vm); } @@ -753,9 +754,9 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) #define free_px(vm, px) free_pd(vm, px_base(px)) static void init_pd(struct i915_page_directory *pd, - struct i915_page_dma *scratch) + struct i915_page_scratch *scratch) { - fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC)); + fill_px(pd, scratch->encode); memset_p(pd->entry, scratch, 512); } @@ -783,30 +784,26 @@ __set_pd_entry(struct i915_page_directory * const pd, write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); } +#define set_pd_entry(pd, pde, to) \ + __set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode) + static inline void -__clear_pd_entry(struct i915_page_directory * const pd, - const unsigned short pde, - struct i915_page_dma * const to, - u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) +clear_pd_entry(struct i915_page_directory * const pd, + const unsigned short pde, + struct i915_page_scratch * const scratch) { GEM_BUG_ON(atomic_read(px_used(pd)) == 0); - write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); - pd->entry[pde] = to; + write_dma_entry(px_base(pd), pde, scratch->encode); + pd->entry[pde] = scratch; atomic_dec(px_used(pd)); } -#define set_pd_entry(pd, pde, to) \ - __set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode) - -#define clear_pd_entry(pd, pde, to) \ - __clear_pd_entry((pd), (pde), (to), gen8_pde_encode) - static bool release_pd_entry(struct i915_page_directory * const pd, const unsigned short pde, struct i915_page_table * const pt, - struct i915_page_dma * const scratch) + struct i915_page_scratch * const scratch) { bool free = false; @@ -913,7 +910,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm, int i; for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i] != &vm->scratch_pt) + if (pd->entry[i] != &vm->scratch[1]) free_pd(vm, pd->entry[i]); } } @@ -925,7 +922,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, int i; for (i = 0; i < pdpes; i++) { - if (pdp->entry[i] == &vm->scratch_pd) + if (pdp->entry[i] == &vm->scratch[2]) continue; gen8_free_page_tables(vm, pdp->entry[i]); @@ -943,7 +940,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); - if (px_base(pdp) == &ppgtt->vm.scratch_pdp) + if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3])) continue; gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); @@ -979,7 +976,9 @@ static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm, gen8_pte_t *vaddr; vaddr = kmap_atomic_px(pt); - memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries); + memset64(vaddr + gen8_pte_index(start), + vm->scratch[0].encode, + num_entries); kunmap_atomic(vaddr); GEM_BUG_ON(num_entries > atomic_read(&pt->used)); @@ -995,11 +994,11 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - GEM_BUG_ON(px_base(pt) == &vm->scratch_pt); + GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); atomic_inc(&pt->used); gen8_ppgtt_clear_pt(vm, pt, start, length); - if (release_pd_entry(pd, pde, pt, &vm->scratch_pt)) + if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) free_px(vm, pt); } } @@ -1015,11 +1014,11 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - GEM_BUG_ON(px_base(pd) == &vm->scratch_pd); + GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2])); atomic_inc(px_used(pd)); gen8_ppgtt_clear_pd(vm, pd, start, length); - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) + if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) free_px(vm, pd); } } @@ -1045,16 +1044,15 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!i915_vm_is_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp); + GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3])); atomic_inc(px_used(pdp)); gen8_ppgtt_clear_pdp(vm, pdp, start, length); - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) + if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) free_px(vm, pdp); } } - static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, struct i915_page_directory *pd, u64 start, u64 length) @@ -1068,7 +1066,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_for_each_pde(pt, pd, start, length, pde) { const int count = gen8_pte_count(start, length); - if (px_base(pt) == &vm->scratch_pt) { + if (px_base(pt) == px_base(&vm->scratch[1])) { spin_unlock(&pd->lock); pt = fetch_and_zero(&alloc); @@ -1080,10 +1078,10 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, } if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) - fill_px(pt, vm->scratch_pte); + fill_px(pt, vm->scratch[0].encode); spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch_pt) { + if (pd->entry[pde] == &vm->scratch[1]) { set_pd_entry(pd, pde, pt); } else { alloc = pt; @@ -1115,7 +1113,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, spin_lock(&pdp->lock); gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (px_base(pd) == &vm->scratch_pd) { + if (px_base(pd) == px_base(&vm->scratch[2])) { spin_unlock(&pdp->lock); pd = fetch_and_zero(&alloc); @@ -1126,10 +1124,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto unwind; } - init_pd(pd, &vm->scratch_pt); + init_pd(pd, &vm->scratch[1]); spin_lock(&pdp->lock); - if (pdp->entry[pdpe] == &vm->scratch_pd) { + if (pdp->entry[pdpe] == &vm->scratch[2]) { set_pd_entry(pdp, pdpe, pd); } else { alloc = pd; @@ -1150,7 +1148,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto out; unwind_pd: - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd)) + if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) free_px(vm, pd); unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); @@ -1179,7 +1177,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, spin_lock(&pml4->lock); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (px_base(pdp) == &vm->scratch_pdp) { + if (px_base(pdp) == px_base(&vm->scratch[3])) { spin_unlock(&pml4->lock); pdp = fetch_and_zero(&alloc); @@ -1190,10 +1188,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto unwind; } - init_pd(pdp, &vm->scratch_pd); + init_pd(pdp, &vm->scratch[2]); spin_lock(&pml4->lock); - if (pml4->entry[pml4e] == &vm->scratch_pdp) { + if (pml4->entry[pml4e] == &vm->scratch[3]) { set_pd_entry(pml4, pml4e, pdp); } else { alloc = pdp; @@ -1214,7 +1212,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto out; unwind_pdp: - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp)) + if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) free_px(vm, pdp); unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); @@ -1428,7 +1426,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { u16 i; - encode = vma->vm->scratch_pte; + encode = vma->vm->scratch[0].encode; vaddr = kmap_atomic_px(i915_pt_entry(pd, idx.pde)); @@ -1471,6 +1469,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, static int gen8_init_scratch(struct i915_address_space *vm) { int ret; + int i; /* * If everybody agrees to not to write into the scratch page, @@ -1484,10 +1483,8 @@ static int gen8_init_scratch(struct i915_address_space *vm) GEM_BUG_ON(!clone->has_read_only); vm->scratch_order = clone->scratch_order; - vm->scratch_pte = clone->scratch_pte; - vm->scratch_pt = clone->scratch_pt; - vm->scratch_pd = clone->scratch_pd; - vm->scratch_pdp = clone->scratch_pdp; + memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch)); + px_dma(&vm->scratch[0]) = 0; /* no xfer of ownership */ return 0; } @@ -1495,44 +1492,25 @@ static int gen8_init_scratch(struct i915_address_space *vm) if (ret) return ret; - vm->scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, - vm->has_read_only); + vm->scratch[0].encode = + gen8_pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_LLC, vm->has_read_only); - if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) { - ret = -ENOMEM; - goto free_scratch_page; - } - fill_page_dma(&vm->scratch_pt, vm->scratch_pte); + for (i = 1; i <= vm->top; i++) { + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[i])))) + goto free_scratch; - if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) { - ret = -ENOMEM; - goto free_pt; - } - fill_page_dma(&vm->scratch_pd, - gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC)); - - if (i915_vm_is_4lvl(vm)) { - if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) { - ret = -ENOMEM; - goto free_pd; - } - fill_page_dma(&vm->scratch_pdp, - gen8_pde_encode(vm->scratch_pdp.daddr, - I915_CACHE_LLC)); + fill_px(&vm->scratch[i], vm->scratch[i - 1].encode); + vm->scratch[i].encode = + gen8_pde_encode(px_dma(&vm->scratch[i]), + I915_CACHE_LLC); } return 0; -free_pd: - cleanup_page_dma(vm, &vm->scratch_pd); -free_pt: - cleanup_page_dma(vm, &vm->scratch_pt); -free_scratch_page: - cleanup_scratch_page(vm); - - return ret; +free_scratch: + free_scratch(vm); + return -ENOMEM; } static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) @@ -1549,7 +1527,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) if (IS_ERR(pd)) goto unwind; - init_pd(pd, &vm->scratch_pt); + init_pd(pd, &vm->scratch[1]); set_pd_entry(pdp, pdpe, pd); } @@ -1582,16 +1560,15 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) static void init_pd_n(struct i915_address_space *vm, struct i915_page_directory *pd, - struct i915_page_dma *to, + struct i915_page_scratch *scratch, const unsigned int entries) { - const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC); u64 * const vaddr = kmap_atomic_px(pd); - memset64(vaddr, daddr, entries); + memset64(vaddr, scratch->encode, entries); kunmap_atomic(vaddr); - memset_p(pd->entry, to, entries); + memset_p(pd->entry, scratch, entries); } static struct i915_page_directory * @@ -1602,7 +1579,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) if (i915_vm_is_4lvl(vm)) { pd = alloc_pd(vm); if (!IS_ERR(pd)) - init_pd(pd, &vm->scratch_pdp); + init_pd(pd, &vm->scratch[3]); return pd; } @@ -1619,7 +1596,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) return ERR_PTR(-ENOMEM); } - init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES); + init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES); return pd; } @@ -1766,7 +1743,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, { struct gen6_ppgtt * const ppgtt = to_gen6_ppgtt(i915_vm_to_ppgtt(vm)); const unsigned int first_entry = start / I915_GTT_PAGE_SIZE; - const gen6_pte_t scratch_pte = vm->scratch_pte; + const gen6_pte_t scratch_pte = vm->scratch[0].encode; unsigned int pde = first_entry / GEN6_PTES; unsigned int pte = first_entry % GEN6_PTES; unsigned int num_entries = length / I915_GTT_PAGE_SIZE; @@ -1777,7 +1754,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, const unsigned int count = min(num_entries, GEN6_PTES - pte); gen6_pte_t *vaddr; - GEM_BUG_ON(px_base(pt) == &vm->scratch_pt); + GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); num_entries -= count; @@ -1814,7 +1791,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter = sgt_dma(vma); gen6_pte_t *vaddr; - GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt); + GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]); vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt)); do { @@ -1859,7 +1836,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_for_each_pde(pt, pd, start, length, pde) { const unsigned int count = gen6_pte_count(start, length); - if (px_base(pt) == &vm->scratch_pt) { + if (px_base(pt) == px_base(&vm->scratch[1])) { spin_unlock(&pd->lock); pt = fetch_and_zero(&alloc); @@ -1870,10 +1847,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, goto unwind_out; } - fill32_px(pt, vm->scratch_pte); + fill32_px(pt, vm->scratch[0].encode); spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch_pt) { + if (pd->entry[pde] == &vm->scratch[1]) { pd->entry[pde] = pt; if (i915_vma_is_bound(ppgtt->vma, I915_VMA_GLOBAL_BIND)) { @@ -1910,26 +1887,23 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) { struct i915_address_space * const vm = &ppgtt->base.vm; struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_table *unused; - u32 pde; int ret; ret = setup_scratch_page(vm, __GFP_HIGHMEM); if (ret) return ret; - vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_NONE, - PTE_READ_ONLY); + vm->scratch[0].encode = + vm->pte_encode(px_dma(&vm->scratch[0]), + I915_CACHE_NONE, PTE_READ_ONLY); - if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) { + if (unlikely(setup_page_dma(vm, px_base(&vm->scratch[1])))) { cleanup_scratch_page(vm); return -ENOMEM; } - fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte); - gen6_for_all_pdes(unused, pd, pde) - pd->entry[pde] = &vm->scratch_pt; + fill32_px(&vm->scratch[1], vm->scratch[0].encode); + memset_p(pd->entry, &vm->scratch[1], I915_PDES); return 0; } @@ -1937,11 +1911,13 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt) static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt) { struct i915_page_directory * const pd = ppgtt->base.pd; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; u32 pde; gen6_for_all_pdes(pt, pd, pde) - if (px_base(pt) != &ppgtt->base.vm.scratch_pt) + if (px_base(pt) != scratch) free_px(&ppgtt->base.vm, pt); } @@ -1999,7 +1975,8 @@ static void pd_vma_unbind(struct i915_vma *vma) { struct gen6_ppgtt *ppgtt = vma->private; struct i915_page_directory * const pd = ppgtt->base.pd; - struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt; + struct i915_page_dma * const scratch = + px_base(&ppgtt->base.vm.scratch[1]); struct i915_page_table *pt; unsigned int pde; @@ -2405,7 +2382,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start / I915_GTT_PAGE_SIZE; unsigned num_entries = length / I915_GTT_PAGE_SIZE; - const gen8_pte_t scratch_pte = vm->scratch_pte; + const gen8_pte_t scratch_pte = vm->scratch[0].encode; gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; @@ -2530,8 +2507,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = vm->scratch_pte; - + scratch_pte = vm->scratch[0].encode; for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); } @@ -3005,8 +2981,8 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return ret; } - ggtt->vm.scratch_pte = - ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr, + ggtt->vm.scratch[0].encode = + ggtt->vm.pte_encode(px_dma(&ggtt->vm.scratch[0]), I915_CACHE_NONE, 0); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 119b6d33b266..669b204d4c13 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -240,6 +240,11 @@ struct i915_page_dma { }; }; +struct i915_page_scratch { + struct i915_page_dma base; + u64 encode; +}; + struct i915_page_table { struct i915_page_dma base; atomic_t used; @@ -260,9 +265,10 @@ struct i915_page_directory { #define px_base(px) \ __px_choose_expr(px, struct i915_page_dma *, __x, \ + __px_choose_expr(px, struct i915_page_scratch *, &__x->base, \ __px_choose_expr(px, struct i915_page_table *, &__x->base, \ __px_choose_expr(px, struct i915_page_directory *, &__x->pt.base, \ - (void)0))) + (void)0)))) #define px_dma(px) (px_base(px)->daddr) #define px_pt(px) \ @@ -317,12 +323,8 @@ struct i915_address_space { #define VM_CLASS_GGTT 0 #define VM_CLASS_PPGTT 1 - u64 scratch_pte; + struct i915_page_scratch scratch[4]; int scratch_order; - struct i915_page_dma scratch_page; - struct i915_page_dma scratch_pt; - struct i915_page_dma scratch_pd; - struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */ int top; /** From patchwork Sun Jul 7 21:00:20 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034319 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3313013BD for ; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2122B27CAF for ; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 1531427E01; Sun, 7 Jul 2019 21:00:57 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 6A59C27FB3 for ; Sun, 7 Jul 2019 21:00:56 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 8304989AB2; Sun, 7 Jul 2019 21:00:52 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id A296989ABA for ; Sun, 7 Jul 2019 21:00:50 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163711-1500050 for multiple; Sun, 07 Jul 2019 22:00:28 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:20 +0100 Message-Id: <20190707210024.26192-8-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 07/11] drm/i915/gtt: Use NULL to encode scratch shadow entries X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP We can simplify our gtt walking code by comparing against NULL for scratch entries as opposed to looking up the distinct per-level scratch pointer. The only caveat is to remember to protect external parties and map the NULL to the scratch top pd. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 124 +++++++++------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- 2 files changed, 41 insertions(+), 85 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b7882f06214a..a99b89502a90 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -596,18 +596,17 @@ static void cleanup_page_dma(struct i915_address_space *vm, #define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) -#define fill_px(px, v) fill_page_dma(px_base(px), (v)) -#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v)) - -static void fill_page_dma(struct i915_page_dma *p, const u64 val) +static void +fill_page_dma(struct i915_page_dma *p, const u64 val, unsigned int count) { - kunmap_atomic(memset64(kmap_atomic(p->page), val, I915_PDES)); + kunmap_atomic(memset64(kmap_atomic(p->page), val, count)); } -static void fill_page_dma_32(struct i915_page_dma *p, const u32 v) -{ - fill_page_dma(p, (u64)v << 32 | v); -} +#define fill_px(px, v) fill_page_dma(px_base(px), (v), I915_PDES) +#define fill32_px(px, v) do { \ + u64 vv = lower_32_bits(v); \ + fill_px(px, vv << 32 | vv); \ +} while (0) static int setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) @@ -711,7 +710,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) } atomic_set(&pt->used, 0); - return pt; } @@ -719,13 +717,11 @@ static struct i915_page_directory *__alloc_pd(void) { struct i915_page_directory *pd; - pd = kmalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); + pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); if (unlikely(!pd)) return NULL; - atomic_set(px_used(pd), 0); spin_lock_init(&pd->lock); - return pd; } @@ -753,63 +749,56 @@ static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd) #define free_px(vm, px) free_pd(vm, px_base(px)) -static void init_pd(struct i915_page_directory *pd, - struct i915_page_scratch *scratch) -{ - fill_px(pd, scratch->encode); - memset_p(pd->entry, scratch, 512); -} - static inline void write_dma_entry(struct i915_page_dma * const pdma, - const unsigned short pde, + const unsigned short idx, const u64 encoded_entry) { u64 * const vaddr = kmap_atomic(pdma->page); - vaddr[pde] = encoded_entry; + vaddr[idx] = encoded_entry; kunmap_atomic(vaddr); } static inline void __set_pd_entry(struct i915_page_directory * const pd, - const unsigned short pde, + const unsigned short idx, struct i915_page_dma * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { GEM_BUG_ON(atomic_read(px_used(pd)) > 512); atomic_inc(px_used(pd)); - pd->entry[pde] = to; - write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC)); + pd->entry[idx] = to; + write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC)); } -#define set_pd_entry(pd, pde, to) \ - __set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode) +#define set_pd_entry(pd, idx, to) \ + __set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode) static inline void clear_pd_entry(struct i915_page_directory * const pd, - const unsigned short pde, - struct i915_page_scratch * const scratch) + const unsigned short idx, + const struct i915_page_scratch * const scratch) { GEM_BUG_ON(atomic_read(px_used(pd)) == 0); - write_dma_entry(px_base(pd), pde, scratch->encode); - pd->entry[pde] = scratch; + write_dma_entry(px_base(pd), idx, scratch->encode); + pd->entry[idx] = NULL; atomic_dec(px_used(pd)); } static bool release_pd_entry(struct i915_page_directory * const pd, - const unsigned short pde, + const unsigned short idx, struct i915_page_table * const pt, - struct i915_page_scratch * const scratch) + const struct i915_page_scratch * const scratch) { bool free = false; spin_lock(&pd->lock); if (atomic_dec_and_test(&pt->used)) { - clear_pd_entry(pd, pde, scratch); + clear_pd_entry(pd, idx, scratch); free = true; } spin_unlock(&pd->lock); @@ -910,7 +899,7 @@ static void gen8_free_page_tables(struct i915_address_space *vm, int i; for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i] != &vm->scratch[1]) + if (pd->entry[i]) free_pd(vm, pd->entry[i]); } } @@ -922,7 +911,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, int i; for (i = 0; i < pdpes; i++) { - if (pdp->entry[i] == &vm->scratch[2]) + if (!pdp->entry[i]) continue; gen8_free_page_tables(vm, pdp->entry[i]); @@ -940,7 +929,7 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); - if (px_base(pdp) == px_base(&ppgtt->vm.scratch[3])) + if (!pdp) continue; gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); @@ -994,8 +983,6 @@ static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - GEM_BUG_ON(px_base(pt) == px_base(&vm->scratch[1])); - atomic_inc(&pt->used); gen8_ppgtt_clear_pt(vm, pt, start, length); if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) @@ -1014,8 +1001,6 @@ static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - GEM_BUG_ON(px_base(pd) == px_base(&vm->scratch[2])); - atomic_inc(px_used(pd)); gen8_ppgtt_clear_pd(vm, pd, start, length); if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) @@ -1044,8 +1029,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!i915_vm_is_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - GEM_BUG_ON(px_base(pdp) == px_base(&vm->scratch[3])); - atomic_inc(px_used(pdp)); gen8_ppgtt_clear_pdp(vm, pdp, start, length); if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) @@ -1066,7 +1049,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_for_each_pde(pt, pd, start, length, pde) { const int count = gen8_pte_count(start, length); - if (px_base(pt) == px_base(&vm->scratch[1])) { + if (!pt) { spin_unlock(&pd->lock); pt = fetch_and_zero(&alloc); @@ -1081,7 +1064,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, fill_px(pt, vm->scratch[0].encode); spin_lock(&pd->lock); - if (pd->entry[pde] == &vm->scratch[1]) { + if (!pd->entry[pde]) { set_pd_entry(pd, pde, pt); } else { alloc = pt; @@ -1113,7 +1096,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, spin_lock(&pdp->lock); gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (px_base(pd) == px_base(&vm->scratch[2])) { + if (!pd) { spin_unlock(&pdp->lock); pd = fetch_and_zero(&alloc); @@ -1124,10 +1107,10 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, goto unwind; } - init_pd(pd, &vm->scratch[1]); + fill_px(pd, vm->scratch[1].encode); spin_lock(&pdp->lock); - if (pdp->entry[pdpe] == &vm->scratch[2]) { + if (!pdp->entry[pdpe]) { set_pd_entry(pdp, pdpe, pd); } else { alloc = pd; @@ -1177,7 +1160,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, spin_lock(&pml4->lock); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (px_base(pdp) == px_base(&vm->scratch[3])) { + if (!pdp) { spin_unlock(&pml4->lock); pdp = fetch_and_zero(&alloc); @@ -1188,10 +1171,10 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, goto unwind; } - init_pd(pdp, &vm->scratch[2]); + fill_px(pdp, vm->scratch[2].encode); spin_lock(&pml4->lock); - if (pml4->entry[pml4e] == &vm->scratch[3]) { + if (!pml4->entry[pml4e]) { set_pd_entry(pml4, pml4e, pdp); } else { alloc = pdp; @@ -1527,7 +1510,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) if (IS_ERR(pd)) goto unwind; - init_pd(pd, &vm->scratch[1]); + fill_px(pd, vm->scratch[1].encode); set_pd_entry(pdp, pdpe, pd); } @@ -1558,46 +1541,19 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) ppgtt->vm.top = i915_vm_is_4lvl(&ppgtt->vm) ? 3 : 2; } -static void init_pd_n(struct i915_address_space *vm, - struct i915_page_directory *pd, - struct i915_page_scratch *scratch, - const unsigned int entries) -{ - u64 * const vaddr = kmap_atomic_px(pd); - - memset64(vaddr, scratch->encode, entries); - kunmap_atomic(vaddr); - - memset_p(pd->entry, scratch, entries); -} - static struct i915_page_directory * gen8_alloc_top_pd(struct i915_address_space *vm) { + const unsigned int count = vm->total >> __gen8_pte_shift(vm->top); struct i915_page_directory *pd; - if (i915_vm_is_4lvl(vm)) { - pd = alloc_pd(vm); - if (!IS_ERR(pd)) - init_pd(pd, &vm->scratch[3]); + GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); + pd = alloc_pd(vm); + if (IS_ERR(pd)) return pd; - } - - /* 3lvl */ - pd = __alloc_pd(); - if (!pd) - return ERR_PTR(-ENOMEM); - - pd->entry[GEN8_3LVL_PDPES] = NULL; - - if (unlikely(setup_page_dma(vm, px_base(pd)))) { - kfree(pd); - return ERR_PTR(-ENOMEM); - } - - init_pd_n(vm, pd, &vm->scratch[2], GEN8_3LVL_PDPES); + fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); return pd; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 669b204d4c13..2341944b9b17 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -610,7 +610,7 @@ i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) { struct i915_page_dma *pt = ppgtt->pd->entry[n]; - return px_dma(pt); + return px_dma(pt ?: px_base(&ppgtt->vm.scratch[ppgtt->vm.top])); } static inline struct i915_ggtt * From patchwork Sun Jul 7 21:00:21 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034327 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7C32F14DB for ; Sun, 7 Jul 2019 21:02:22 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 6CA5028179 for ; Sun, 7 Jul 2019 21:02:22 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 61413281DB; Sun, 7 Jul 2019 21:02:22 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id E5AA128179 for ; Sun, 7 Jul 2019 21:02:21 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 71C6E89AB5; Sun, 7 Jul 2019 21:02:21 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 323EA89AB5 for ; Sun, 7 Jul 2019 21:02:20 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163712-1500050 for multiple; Sun, 07 Jul 2019 22:00:28 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:21 +0100 Message-Id: <20190707210024.26192-9-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 08/11] drm/i915/gtt: Recursive cleanup for gen8 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP With an explicit level, we can refactor the separate cleanup functions as a simple recursive function. We take the opportunity to pass down the size of each level so that we can deal with the different sizes of top-level and avoid over allocating for 32/36-bit vm. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 93 ++++++++++------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- 2 files changed, 33 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a99b89502a90..0625b07a1132 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -713,11 +713,11 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm) return pt; } -static struct i915_page_directory *__alloc_pd(void) +static struct i915_page_directory *__alloc_pd(size_t sz) { struct i915_page_directory *pd; - pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL); + pd = kzalloc(sz, I915_GFP_ALLOW_FAIL); if (unlikely(!pd)) return NULL; @@ -729,7 +729,7 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - pd = __alloc_pd(); + pd = __alloc_pd(sizeof(*pd)); if (unlikely(!pd)) return ERR_PTR(-ENOMEM); @@ -766,7 +766,7 @@ __set_pd_entry(struct i915_page_directory * const pd, struct i915_page_dma * const to, u64 (*encode)(const dma_addr_t, const enum i915_cache_level)) { - GEM_BUG_ON(atomic_read(px_used(pd)) > 512); + GEM_BUG_ON(atomic_read(px_used(pd)) > ARRAY_SIZE(pd->entry)); atomic_inc(px_used(pd)); pd->entry[idx] = to; @@ -893,64 +893,34 @@ static inline unsigned int gen8_pt_count(u64 addr, u64 end) return end - addr; } -static void gen8_free_page_tables(struct i915_address_space *vm, - struct i915_page_directory *pd) +static void __gen8_ppgtt_cleanup(struct i915_address_space *vm, + struct i915_page_directory *pd, + int count, int lvl) { - int i; - - for (i = 0; i < I915_PDES; i++) { - if (pd->entry[i]) - free_pd(vm, pd->entry[i]); - } -} - -static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, - struct i915_page_directory *pdp) -{ - const unsigned int pdpes = i915_pdpes_per_pdp(vm); - int i; - - for (i = 0; i < pdpes; i++) { - if (!pdp->entry[i]) - continue; - - gen8_free_page_tables(vm, pdp->entry[i]); - free_pd(vm, pdp->entry[i]); - } - - free_px(vm, pdp); -} - -static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt) -{ - struct i915_page_directory * const pml4 = ppgtt->pd; - int i; - - for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { - struct i915_page_directory *pdp = i915_pdp_entry(pml4, i); + if (lvl) { + void **pde = pd->entry; - if (!pdp) - continue; + do { + if (!*pde) + continue; - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp); + __gen8_ppgtt_cleanup(vm, *pde, I915_PDES, lvl - 1); + } while (pde++, --count); } - free_px(&ppgtt->vm, pml4); + free_px(vm, pd); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) { - struct drm_i915_private *i915 = vm->i915; struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - if (intel_vgpu_active(i915)) + if (intel_vgpu_active(vm->i915)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (i915_vm_is_4lvl(vm)) - gen8_ppgtt_cleanup_4lvl(ppgtt); - else - gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd); - + __gen8_ppgtt_cleanup(vm, ppgtt->pd, + vm->total >> __gen8_pte_shift(vm->top), + vm->top); free_scratch(vm); } @@ -1502,24 +1472,18 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) struct i915_page_directory *pdp = ppgtt->pd; struct i915_page_directory *pd; u64 start = 0, length = ppgtt->vm.total; - u64 from = start; unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { pd = alloc_pd(vm); if (IS_ERR(pd)) - goto unwind; + return PTR_ERR(pd); fill_px(pd, vm->scratch[1].encode); set_pd_entry(pdp, pdpe, pd); } return 0; - -unwind: - gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); - atomic_set(px_used(pdp), 0); - return -ENOMEM; } static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt) @@ -1549,9 +1513,14 @@ gen8_alloc_top_pd(struct i915_address_space *vm) GEM_BUG_ON(count > ARRAY_SIZE(pd->entry)); - pd = alloc_pd(vm); - if (IS_ERR(pd)) - return pd; + pd = __alloc_pd(offsetof(typeof(*pd), entry[count])); + if (unlikely(!pd)) + return ERR_PTR(-ENOMEM); + + if (unlikely(setup_page_dma(vm, px_base(pd)))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); return pd; @@ -1623,7 +1592,9 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) return ppgtt; err_free_pd: - free_px(&ppgtt->vm, ppgtt->pd); + __gen8_ppgtt_cleanup(&ppgtt->vm, ppgtt->pd, + ppgtt->vm.total >> __gen8_pte_shift(ppgtt->vm.top), + ppgtt->vm.top); err_free_scratch: free_scratch(&ppgtt->vm); err_free: @@ -2069,7 +2040,7 @@ static struct i915_ppgtt *gen6_ppgtt_create(struct drm_i915_private *i915) ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode; - ppgtt->base.pd = __alloc_pd(); + ppgtt->base.pd = __alloc_pd(sizeof(*ppgtt->base.pd)); if (!ppgtt->base.pd) { err = -ENOMEM; goto err_free; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 2341944b9b17..d0128f52184b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -253,7 +253,7 @@ struct i915_page_table { struct i915_page_directory { struct i915_page_table pt; spinlock_t lock; - void *entry[512]; + void *entry[I915_PDES]; }; #define __px_choose_expr(x, type, expr, other) \ From patchwork Sun Jul 7 21:00:22 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034325 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8416414DB for ; Sun, 7 Jul 2019 21:02:13 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 7157628179 for ; Sun, 7 Jul 2019 21:02:13 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 64639281DB; Sun, 7 Jul 2019 21:02:13 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id C5A2728179 for ; Sun, 7 Jul 2019 21:02:12 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 4161E89A9A; Sun, 7 Jul 2019 21:02:12 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 8A12C89A9A for ; Sun, 7 Jul 2019 21:02:10 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163713-1500050 for multiple; Sun, 07 Jul 2019 22:00:28 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:22 +0100 Message-Id: <20190707210024.26192-10-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 09/11] drm/i915/gtt: Recursive ppgtt clear for gen8 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP With an explicit level, we can refactor the separate clear functions as a simple recursive function. The additional knowledge of the level allows us to spot when we can free an entire subtree at once. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Kconfig.debug | 15 +++ drivers/gpu/drm/i915/i915_gem_gtt.c | 154 ++++++++++++++++------------ 2 files changed, 105 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 8d922bb4d953..ed8c787058a5 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -94,6 +94,21 @@ config DRM_I915_TRACE_GEM If in doubt, say "N". +config DRM_I915_TRACE_GTT + bool "Insert extra ftrace output from the GTT internals" + depends on DRM_I915_DEBUG_GEM + select TRACING + default n + help + Enable additional and verbose debugging output that will spam + ordinary tests, but may be vital for post-mortem debugging when + used with /proc/sys/kernel/ftrace_dump_on_oops + + Recommended for driver developers only. + + If in doubt, say "N". + + config DRM_I915_SW_FENCE_DEBUG_OBJECTS bool "Enable additional driver debugging for fence objects" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0625b07a1132..e2d6169a8a66 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -46,6 +46,12 @@ #define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN) +#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT) +#define DBG(...) trace_printk(__VA_ARGS__) +#else +#define DBG(...) +#endif + /** * DOC: Global GTT views * @@ -796,6 +802,9 @@ release_pd_entry(struct i915_page_directory * const pd, { bool free = false; + if (atomic_add_unless(&pt->used, -1, 1)) + return false; + spin_lock(&pd->lock); if (atomic_dec_and_test(&pt->used)) { clear_pd_entry(pd, idx, scratch); @@ -924,86 +933,101 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) free_scratch(vm); } -/* Removes entries from a single page table, releasing it if it's empty. - * Caller can use the return value to update higher-level entries. - */ -static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm, - struct i915_page_table *pt, - u64 start, u64 length) +static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 start, const u64 end, int lvl) { - const unsigned int num_entries = gen8_pte_count(start, length); - gen8_pte_t *vaddr; + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + unsigned int idx, len; - vaddr = kmap_atomic_px(pt); - memset64(vaddr + gen8_pte_index(start), - vm->scratch[0].encode, - num_entries); - kunmap_atomic(vaddr); + len = gen8_pd_range(start, end, lvl--, &idx); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", + __func__, vm, lvl + 1, start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || len >= atomic_read(px_used(pd))); - GEM_BUG_ON(num_entries > atomic_read(&pt->used)); + do { + struct i915_page_table *pt = pd->entry[idx]; + + if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) && + gen8_pd_subsumes(start, end, lvl)) { + DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n", + __func__, vm, lvl + 1, idx, start, end); + clear_pd_entry(pd, idx, scratch); + __gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl); + start += (u64)I915_PDES << gen8_pd_shift(lvl); + continue; + } - atomic_sub(num_entries, &pt->used); -} + if (lvl) { + start = __gen8_ppgtt_clear(vm, as_pd(pt), + start, end, lvl); + } else { + unsigned int count; + u64 *vaddr; -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - struct i915_page_table *pt; - u32 pde; + count = gen8_pt_count(start, end); + DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} removing pte\n", + __func__, vm, lvl, start, end, + gen8_pd_index(start, 0), count, + atomic_read(&pt->used)); + GEM_BUG_ON(!count || count >= atomic_read(&pt->used)); - gen8_for_each_pde(pt, pd, start, length, pde) { - atomic_inc(&pt->used); - gen8_ppgtt_clear_pt(vm, pt, start, length); - if (release_pd_entry(pd, pde, pt, &vm->scratch[1])) + vaddr = kmap_atomic_px(pt); + memset64(vaddr + gen8_pd_index(start, 0), + vm->scratch[0].encode, + count); + kunmap_atomic(vaddr); + + atomic_sub(count, &pt->used); + start += count; + } + + if (release_pd_entry(pd, idx, pt, scratch)) free_px(vm, pt); - } + } while (idx++, --len); + + return start; } -/* Removes entries from a single page dir pointer, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, - struct i915_page_directory * const pdp, - u64 start, u64 length) +static void gen8_ppgtt_clear(struct i915_address_space *vm, + u64 start, u64 length) { - struct i915_page_directory *pd; - unsigned int pdpe; + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - atomic_inc(px_used(pd)); - gen8_ppgtt_clear_pd(vm, pd, start, length); - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) - free_px(vm, pd); - } + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); + + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + start, start + length, vm->top); } -static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { - gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length); + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); + + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + + __gen8_ppgtt_clear(vm, pd, start, start + length, 1); } -/* Removes entries from a single pml4. - * This is the top-level structure in 4-level page tables used on gen8+. - * Empty entries are always scratch pml4e. - */ -static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, - u64 start, u64 length) +static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, + struct i915_page_directory * const pdp, + u64 start, u64 length) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp; - unsigned int pml4e; + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!i915_vm_is_4lvl(vm)); + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - atomic_inc(px_used(pdp)); - gen8_ppgtt_clear_pdp(vm, pdp, start, length); - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) - free_px(vm, pdp); - } + __gen8_ppgtt_clear(vm, pdp, start, start + length, 2); } static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, @@ -1168,7 +1192,7 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) free_px(vm, pdp); unwind: - gen8_ppgtt_clear_4lvl(vm, from, start - from); + gen8_ppgtt_clear(vm, from, start - from); out: if (alloc) free_px(vm, alloc); @@ -1481,6 +1505,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) fill_px(pd, vm->scratch[1].encode); set_pd_entry(pdp, pdpe, pd); + atomic_inc(px_used(pd)); /* keep pinned */ } return 0; @@ -1523,6 +1548,7 @@ gen8_alloc_top_pd(struct i915_address_space *vm) } fill_page_dma(px_base(pd), vm->scratch[vm->top].encode, count); + atomic_inc(px_used(pd)); /* mark as pinned */ return pd; } @@ -1571,7 +1597,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) if (i915_vm_is_4lvl(&ppgtt->vm)) { ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; } else { if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); @@ -1581,9 +1606,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; } + ppgtt->vm.clear_range = gen8_ppgtt_clear; + if (intel_vgpu_active(i915)) gen8_ppgtt_notify_vgt(ppgtt, true); From patchwork Sun Jul 7 21:00:23 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034329 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DC23114DB for ; Sun, 7 Jul 2019 21:02:23 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id CC9AD28179 for ; Sun, 7 Jul 2019 21:02:23 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id C12C4281DB; Sun, 7 Jul 2019 21:02:23 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 2AED728179 for ; Sun, 7 Jul 2019 21:02:23 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 9442C89AB7; Sun, 7 Jul 2019 21:02:22 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 32BE189AB7 for ; Sun, 7 Jul 2019 21:02:20 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163714-1500050 for multiple; Sun, 07 Jul 2019 22:00:28 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:23 +0100 Message-Id: <20190707210024.26192-11-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 10/11] drm/i915/gtt: Recursive ppgtt alloc for gen8 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Refactor the separate allocation routines into a single recursive function. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 272 ++++++++++------------------ 1 file changed, 97 insertions(+), 175 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e2d6169a8a66..83f03f8f4d2c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1004,199 +1004,119 @@ static void gen8_ppgtt_clear(struct i915_address_space *vm, start, start + length, vm->top); } -static void gen8_ppgtt_clear_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) -{ - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - - start >>= GEN8_PTE_SHIFT; - length >>= GEN8_PTE_SHIFT; - - __gen8_ppgtt_clear(vm, pd, start, start + length, 1); -} - -static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm, - struct i915_page_directory * const pdp, - u64 start, u64 length) -{ - GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); - GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - - start >>= GEN8_PTE_SHIFT; - length >>= GEN8_PTE_SHIFT; - - __gen8_ppgtt_clear(vm, pdp, start, start + length, 2); -} - -static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) +static int __gen8_ppgtt_alloc(struct i915_address_space * const vm, + struct i915_page_directory * const pd, + u64 * const start, u64 end, int lvl) { - struct i915_page_table *pt, *alloc = NULL; - u64 from = start; - unsigned int pde; + const struct i915_page_scratch * const scratch = &vm->scratch[lvl]; + struct i915_page_table *alloc = NULL; + unsigned int idx, len; int ret = 0; + len = gen8_pd_range(*start, end, lvl--, &idx); + DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d}\n", + __func__, vm, lvl + 1, *start, end, + idx, len, atomic_read(px_used(pd))); + GEM_BUG_ON(!len || (idx + len - 1) >> gen8_pd_shift(1)); + spin_lock(&pd->lock); - gen8_for_each_pde(pt, pd, start, length, pde) { - const int count = gen8_pte_count(start, length); + GEM_BUG_ON(!atomic_read(px_used(pd))); /* Must be pinned! */ + do { + struct i915_page_table *pt = pd->entry[idx]; if (!pt) { spin_unlock(&pd->lock); - pt = fetch_and_zero(&alloc); - if (!pt) - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind; - } + DBG("%s(%p):{ lvl:%d, idx:%d } allocating new tree\n", + __func__, vm, lvl + 1, idx); - if (count < GEN8_PTES || intel_vgpu_active(vm->i915)) - fill_px(pt, vm->scratch[0].encode); + pt = fetch_and_zero(&alloc); + if (lvl) { + if (!pt) { + pt = &alloc_pd(vm)->pt; + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + break; + } + } - spin_lock(&pd->lock); - if (!pd->entry[pde]) { - set_pd_entry(pd, pde, pt); + fill_px(pt, vm->scratch[lvl].encode); } else { - alloc = pt; - pt = pd->entry[pde]; - } - } - - atomic_add(count, &pt->used); - } - spin_unlock(&pd->lock); - goto out; - -unwind: - gen8_ppgtt_clear_pd(vm, pd, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; -} - -static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, - struct i915_page_directory *pdp, - u64 start, u64 length) -{ - struct i915_page_directory *pd, *alloc = NULL; - u64 from = start; - unsigned int pdpe; - int ret = 0; + if (!pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) { + ret = PTR_ERR(pt); + break; + } + } - spin_lock(&pdp->lock); - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (!pd) { - spin_unlock(&pdp->lock); - - pd = fetch_and_zero(&alloc); - if (!pd) - pd = alloc_pd(vm); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto unwind; + if (intel_vgpu_active(vm->i915) || + gen8_pt_count(*start, end) < I915_PDES) + fill_px(pt, vm->scratch[lvl].encode); } - fill_px(pd, vm->scratch[1].encode); + spin_lock(&pd->lock); + if (likely(!pd->entry[idx])) + set_pd_entry(pd, idx, pt); + else + alloc = pt, pt = pd->entry[idx]; + } - spin_lock(&pdp->lock); - if (!pdp->entry[pdpe]) { - set_pd_entry(pdp, pdpe, pd); - } else { - alloc = pd; - pd = pdp->entry[pdpe]; + if (lvl) { + atomic_inc(&pt->used); + spin_unlock(&pd->lock); + + ret = __gen8_ppgtt_alloc(vm, as_pd(pt), + start, end, lvl); + if (unlikely(ret)) { + if (release_pd_entry(pd, idx, pt, scratch)) + free_px(vm, pt); + goto out; } - } - atomic_inc(px_used(pd)); - spin_unlock(&pdp->lock); - ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (unlikely(ret)) - goto unwind_pd; + spin_lock(&pd->lock); + atomic_dec(&pt->used); + GEM_BUG_ON(!atomic_read(&pt->used)); + } else { + unsigned int count = gen8_pt_count(*start, end); - spin_lock(&pdp->lock); - atomic_dec(px_used(pd)); - } - spin_unlock(&pdp->lock); - goto out; + DBG("%s(%p):{lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d} inserting pte\n", + __func__, vm, lvl, *start, end, + gen8_pd_index(*start, 0), count, + atomic_read(&pt->used)); -unwind_pd: - if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch[2])) - free_px(vm, pd); -unwind: - gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); + atomic_add(count, &pt->used); + GEM_BUG_ON(atomic_read(&pt->used) > I915_PDES); + *start += count; + } + } while (idx++, --len); + spin_unlock(&pd->lock); out: if (alloc) free_px(vm, alloc); return ret; } -static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, - u64 start, u64 length) -{ - return gen8_ppgtt_alloc_pdp(vm, - i915_vm_to_ppgtt(vm)->pd, start, length); -} - -static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, - u64 start, u64 length) +static int gen8_ppgtt_alloc(struct i915_address_space *vm, + u64 start, u64 length) { - struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct i915_page_directory * const pml4 = ppgtt->pd; - struct i915_page_directory *pdp, *alloc = NULL; u64 from = start; - int ret = 0; - u32 pml4e; - - spin_lock(&pml4->lock); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!pdp) { - spin_unlock(&pml4->lock); - - pdp = fetch_and_zero(&alloc); - if (!pdp) - pdp = alloc_pd(vm); - if (IS_ERR(pdp)) { - ret = PTR_ERR(pdp); - goto unwind; - } - - fill_px(pdp, vm->scratch[2].encode); + int err; - spin_lock(&pml4->lock); - if (!pml4->entry[pml4e]) { - set_pd_entry(pml4, pml4e, pdp); - } else { - alloc = pdp; - pdp = pml4->entry[pml4e]; - } - } - atomic_inc(px_used(pdp)); - spin_unlock(&pml4->lock); + GEM_BUG_ON(!IS_ALIGNED(start, BIT_ULL(GEN8_PTE_SHIFT))); + GEM_BUG_ON(!IS_ALIGNED(length, BIT_ULL(GEN8_PTE_SHIFT))); - ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); - if (unlikely(ret)) - goto unwind_pdp; + start >>= GEN8_PTE_SHIFT; + length >>= GEN8_PTE_SHIFT; + GEM_BUG_ON(length == 0); - spin_lock(&pml4->lock); - atomic_dec(px_used(pdp)); - } - spin_unlock(&pml4->lock); - goto out; + err = __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd, + &start, start + length, vm->top); + if (unlikely(err)) + __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd, + from, start, vm->top); -unwind_pdp: - if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch[3])) - free_px(vm, pdp); -unwind: - gen8_ppgtt_clear(vm, from, start - from); -out: - if (alloc) - free_px(vm, alloc); - return ret; + return err; } static inline struct sgt_dma { @@ -1493,19 +1413,22 @@ static int gen8_init_scratch(struct i915_address_space *vm) static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) { struct i915_address_space *vm = &ppgtt->vm; - struct i915_page_directory *pdp = ppgtt->pd; - struct i915_page_directory *pd; - u64 start = 0, length = ppgtt->vm.total; - unsigned int pdpe; + struct i915_page_directory *pd = ppgtt->pd; + unsigned int idx; + + GEM_BUG_ON(vm->top != 2); + GEM_BUG_ON((vm->total >> __gen8_pte_shift(2)) != GEN8_3LVL_PDPES); + + for (idx = 0; idx < GEN8_3LVL_PDPES; idx++) { + struct i915_page_directory *pde; - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - pd = alloc_pd(vm); - if (IS_ERR(pd)) - return PTR_ERR(pd); + pde = alloc_pd(vm); + if (IS_ERR(pde)) + return PTR_ERR(pde); - fill_px(pd, vm->scratch[1].encode); - set_pd_entry(pdp, pdpe, pd); - atomic_inc(px_used(pd)); /* keep pinned */ + fill_px(pde, vm->scratch[1].encode); + set_pd_entry(pd, idx, pde); + atomic_inc(px_used(pde)); /* keep pinned */ } return 0; @@ -1595,7 +1518,6 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) } if (i915_vm_is_4lvl(&ppgtt->vm)) { - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; } else { if (intel_vgpu_active(i915)) { @@ -1604,10 +1526,10 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) goto err_free_pd; } - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; } + ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc; ppgtt->vm.clear_range = gen8_ppgtt_clear; if (intel_vgpu_active(i915)) From patchwork Sun Jul 7 21:00:24 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Chris Wilson X-Patchwork-Id: 11034331 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2FE6514DB for ; Sun, 7 Jul 2019 21:02:26 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 1E90A28179 for ; Sun, 7 Jul 2019 21:02:26 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 12FAA281DB; Sun, 7 Jul 2019 21:02:26 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id 586A128179 for ; Sun, 7 Jul 2019 21:02:25 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id D059E89AC6; Sun, 7 Jul 2019 21:02:24 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from fireflyinternet.com (mail.fireflyinternet.com [109.228.58.192]) by gabe.freedesktop.org (Postfix) with ESMTPS id 3ABFF89AC6 for ; Sun, 7 Jul 2019 21:02:20 +0000 (UTC) X-Default-Received-SPF: pass (skip=forwardok (res=PASS)) x-ip-name=78.156.65.138; Received: from haswell.alporthouse.com (unverified [78.156.65.138]) by fireflyinternet.com (Firefly Internet (M1)) with ESMTP id 17163715-1500050 for multiple; Sun, 07 Jul 2019 22:00:29 +0100 From: Chris Wilson To: intel-gfx@lists.freedesktop.org Date: Sun, 7 Jul 2019 22:00:24 +0100 Message-Id: <20190707210024.26192-12-chris@chris-wilson.co.uk> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190707210024.26192-1-chris@chris-wilson.co.uk> References: <20190707210024.26192-1-chris@chris-wilson.co.uk> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 11/11] drm/i915/gtt: Tidy up ppgtt insertion for gen8 X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP Apply the new radix shift helpers to extract the multi-level indices cleanly when inserting pte into the gtt tree. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 115 +++++++++++----------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 92 ++-------------------- 2 files changed, 49 insertions(+), 158 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 83f03f8f4d2c..e2c3c4288edb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1128,47 +1128,28 @@ static inline struct sgt_dma { return (struct sgt_dma) { sg, addr, addr + sg->length }; } -struct gen8_insert_pte { - u16 pml4e; - u16 pdpe; - u16 pde; - u16 pte; -}; - -static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) -{ - return (struct gen8_insert_pte) { - gen8_pml4e_index(start), - gen8_pdpe_index(start), - gen8_pde_index(start), - gen8_pte_index(start), - }; -} - -static __always_inline bool +static __always_inline u64 gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, struct i915_page_directory *pdp, struct sgt_dma *iter, - struct gen8_insert_pte *idx, + u64 idx, enum i915_cache_level cache_level, u32 flags) { struct i915_page_directory *pd; const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; - bool ret; - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = i915_pd_entry(pdp, idx->pdpe); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); + pd = i915_pd_entry(pdp, gen8_pd_index(idx, 2)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); do { - vaddr[idx->pte] = pte_encode | iter->dma; + vaddr[gen8_pd_index(idx, 0)] = pte_encode | iter->dma; iter->dma += I915_GTT_PAGE_SIZE; if (iter->dma >= iter->max) { iter->sg = __sg_next(iter->sg); if (!iter->sg) { - ret = false; + idx = 0; break; } @@ -1176,30 +1157,22 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt, iter->max = iter->dma + iter->sg->length; } - if (++idx->pte == GEN8_PTES) { - idx->pte = 0; - - if (++idx->pde == I915_PDES) { - idx->pde = 0; - + if (gen8_pd_index(++idx, 0) == 0) { + if (gen8_pd_index(idx, 1) == 0) { /* Limited by sg length for 3lvl */ - if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { - idx->pdpe = 0; - ret = true; + if (gen8_pd_index(idx, 2) == 0) break; - } - GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm)); - pd = pdp->entry[idx->pdpe]; + pd = pdp->entry[gen8_pd_index(idx, 2)]; } kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, gen8_pd_index(idx, 1))); } } while (1); kunmap_atomic(vaddr); - return ret; + return idx; } static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, @@ -1209,9 +1182,9 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, { struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); - gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, &idx, + gen8_ppgtt_insert_pte_entries(ppgtt, ppgtt->pd, &iter, + vma->node.start >> GEN8_PTE_SHIFT, cache_level, flags); vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; @@ -1228,39 +1201,38 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, dma_addr_t rem = iter->sg->length; do { - struct gen8_insert_pte idx = gen8_insert_pte(start); struct i915_page_directory *pdp = - i915_pdp_entry(pml4, idx.pml4e); - struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe); - unsigned int page_size; - bool maybe_64K = false; + i915_pd_entry(pml4, __gen8_pte_index(start, 3)); + struct i915_page_directory *pd = + i915_pd_entry(pdp, __gen8_pte_index(start, 2)); gen8_pte_t encode = pte_encode; + unsigned int maybe_64K = -1; + unsigned int page_size; gen8_pte_t *vaddr; - u16 index, max; + u16 index; if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_2M && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_2M) && - rem >= I915_GTT_PAGE_SIZE_2M && !idx.pte) { - index = idx.pde; - max = I915_PDES; - page_size = I915_GTT_PAGE_SIZE_2M; - + rem >= I915_GTT_PAGE_SIZE_2M && + !__gen8_pte_index(start, 0)) { + index = __gen8_pte_index(start, 1); encode |= GEN8_PDE_PS_2M; + page_size = I915_GTT_PAGE_SIZE_2M; vaddr = kmap_atomic_px(pd); } else { - struct i915_page_table *pt = i915_pt_entry(pd, idx.pde); + struct i915_page_table *pt = + i915_pt_entry(pd, __gen8_pte_index(start, 1)); - index = idx.pte; - max = GEN8_PTES; + index = __gen8_pte_index(start, 0); page_size = I915_GTT_PAGE_SIZE; if (!index && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K && IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (max - index) * I915_GTT_PAGE_SIZE)) - maybe_64K = true; + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE)) + maybe_64K = __gen8_pte_index(start, 1); vaddr = kmap_atomic_px(pt); } @@ -1281,16 +1253,16 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, iter->dma = sg_dma_address(iter->sg); iter->max = iter->dma + rem; - if (maybe_64K && index < max && + if (maybe_64K != -1 && index < I915_PDES && !(IS_ALIGNED(iter->dma, I915_GTT_PAGE_SIZE_64K) && (IS_ALIGNED(rem, I915_GTT_PAGE_SIZE_64K) || - rem >= (max - index) * I915_GTT_PAGE_SIZE))) - maybe_64K = false; + rem >= (I915_PDES - index) * I915_GTT_PAGE_SIZE))) + maybe_64K = -1; if (unlikely(!IS_ALIGNED(iter->dma, page_size))) break; } - } while (rem >= page_size && index < max); + } while (rem >= page_size && index < I915_PDES); kunmap_atomic(vaddr); @@ -1300,14 +1272,14 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, * it and have reached the end of the sg table and we have * enough padding. */ - if (maybe_64K && - (index == max || + if (maybe_64K != -1 && + (index == I915_PDES || (i915_vm_has_scratch_64K(vma->vm) && !iter->sg && IS_ALIGNED(vma->node.start + vma->node.size, I915_GTT_PAGE_SIZE_2M)))) { vaddr = kmap_atomic_px(pd); - vaddr[idx.pde] |= GEN8_PDE_IPS_64K; + vaddr[maybe_64K] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; @@ -1324,8 +1296,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, u16 i; encode = vma->vm->scratch[0].encode; - vaddr = kmap_atomic_px(i915_pt_entry(pd, - idx.pde)); + vaddr = kmap_atomic_px(i915_pt_entry(pd, maybe_64K)); for (i = 1; i < index; i += 16) memset64(vaddr + i, encode, 15); @@ -1351,13 +1322,13 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, gen8_ppgtt_insert_huge_entries(vma, pml4, &iter, cache_level, flags); } else { - struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); + u64 idx = vma->node.start >> GEN8_PTE_SHIFT; - while (gen8_ppgtt_insert_pte_entries(ppgtt, - i915_pdp_entry(pml4, idx.pml4e++), - &iter, &idx, cache_level, - flags)) - GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); + while ((idx = gen8_ppgtt_insert_pte_entries(ppgtt, + i915_pd_entry(pml4, gen8_pd_index(idx, 3)), + &iter, idx, cache_level, + flags))) + ; vma->page_sizes.gtt = I915_GTT_PAGE_SIZE; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index d0128f52184b..4bcf61e48570 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -115,30 +115,19 @@ typedef u64 gen8_pte_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) -/* GEN8 32b style address is defined as a 3 level page table: +/* + * GEN8 32b style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. - */ -#define GEN8_3LVL_PDPES 4 -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) - -/* GEN8 48b style address is defined as a 4 level page table: + * + * GEN8 48b style address is defined as a 4 level page table: * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * PML4E | PDPE | PDE | PTE | offset */ -#define GEN8_PML4ES_PER_PML4 512 -#define GEN8_PML4E_SHIFT 39 -#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1) -#define GEN8_PDPE_SHIFT 30 -/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page - * tables */ -#define GEN8_PDPE_MASK 0x1ff +#define GEN8_3LVL_PDPES 4 +#define GEN8_PTE_SHIFT 12 #define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE 0 /* WB LLC */ @@ -522,15 +511,6 @@ static inline u32 gen6_pde_index(u32 addr) return i915_pde_index(addr, GEN6_PDE_SHIFT); } -static inline unsigned int -i915_pdpes_per_pdp(const struct i915_address_space *vm) -{ - if (i915_vm_is_4lvl(vm)) - return GEN8_PML4ES_PER_PML4; - - return GEN8_3LVL_PDPES; -} - static inline struct i915_page_table * i915_pt_entry(const struct i915_page_directory * const pd, const unsigned short n) @@ -545,66 +525,6 @@ i915_pd_entry(const struct i915_page_directory * const pdp, return pdp->entry[n]; } -static inline struct i915_page_directory * -i915_pdp_entry(const struct i915_page_directory * const pml4, - const unsigned short n) -{ - return pml4->entry[n]; -} - -/* Equivalent to the gen6 version, For each pde iterates over every pde - * between from start until start + length. On gen8+ it simply iterates - * over every page directory entry in a page directory. - */ -#define gen8_for_each_pde(pt, pd, start, length, iter) \ - for (iter = gen8_pde_index(start); \ - length > 0 && iter < I915_PDES && \ - (pt = i915_pt_entry(pd, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ - for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < i915_pdpes_per_pdp(vm) && \ - (pd = i915_pd_entry(pdp, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -#define gen8_for_each_pml4e(pdp, pml4, start, length, iter) \ - for (iter = gen8_pml4e_index(start); \ - length > 0 && iter < GEN8_PML4ES_PER_PML4 && \ - (pdp = i915_pdp_entry(pml4, iter), true); \ - ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT); \ - temp = min(temp - start, length); \ - start += temp, length -= temp; }), ++iter) - -static inline u32 gen8_pte_index(u64 address) -{ - return i915_pte_index(address, GEN8_PDE_SHIFT); -} - -static inline u32 gen8_pde_index(u64 address) -{ - return i915_pde_index(address, GEN8_PDE_SHIFT); -} - -static inline u32 gen8_pdpe_index(u64 address) -{ - return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; -} - -static inline u32 gen8_pml4e_index(u64 address) -{ - return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; -} - -static inline u64 gen8_pte_count(u64 address, u64 length) -{ - return i915_pte_count(address, length, GEN8_PDE_SHIFT); -} - static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n) {