From patchwork Fri Jun 14 16:43:50 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Mika Kuoppala X-Patchwork-Id: 10995983 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3A19076 for ; Fri, 14 Jun 2019 16:44:06 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 2ADD626538 for ; Fri, 14 Jun 2019 16:44:06 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 1F78E28390; Fri, 14 Jun 2019 16:44:06 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-5.2 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_MED autolearn=ham version=3.3.1 Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher DHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.wl.linuxfoundation.org (Postfix) with ESMTPS id A1CA426538 for ; Fri, 14 Jun 2019 16:44:05 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 0FFA489B03; Fri, 14 Jun 2019 16:44:05 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by gabe.freedesktop.org (Postfix) with ESMTPS id 8F69589AF3 for ; Fri, 14 Jun 2019 16:43:56 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga104.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 14 Jun 2019 09:43:56 -0700 X-ExtLoop1: 1 Received: from rosetta.fi.intel.com ([10.237.72.186]) by fmsmga006.fm.intel.com with ESMTP; 14 Jun 2019 09:43:55 -0700 Received: by rosetta.fi.intel.com (Postfix, from userid 1000) id E665784068C; Fri, 14 Jun 2019 19:43:50 +0300 (EEST) From: Mika Kuoppala To: intel-gfx@lists.freedesktop.org Date: Fri, 14 Jun 2019 19:43:50 +0300 Message-Id: <20190614164350.30415-10-mika.kuoppala@linux.intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190614164350.30415-1-mika.kuoppala@linux.intel.com> References: <20190614164350.30415-1-mika.kuoppala@linux.intel.com> Subject: [Intel-gfx] [PATCH 10/10] drm/i915/gtt: Setup phys pages for 3lvl pdps X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Virus-Scanned: ClamAV using ClamSMTP If we setup backing phys page for 3lvl pdps, even they are not used, we lose 5 pages per ppgtt. Trading this memory on bsw, we gain more common code paths for all gen8+ directory manipulation. And those paths are now void of checks for page directory type, making the hot paths faster. Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 112 +++++++++++++++++----------- 1 file changed, 68 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ba2802c25d13..c76c92072d54 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -714,22 +714,14 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) return pd; } -static inline bool pd_has_phys_page(const struct i915_page_directory * const pd) -{ - return pd->base.page; -} - static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (likely(pd_has_phys_page(pd))) - cleanup_page_dma(vm, &pd->base); - + cleanup_page_dma(vm, &pd->base); kfree(pd); } #define init_pd(vm, pd, to) { \ - GEM_DEBUG_BUG_ON(!pd_has_phys_page(pd)); \ fill_px((vm), (pd), gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ memset_p((pd)->entry, (to), 512); \ } @@ -747,8 +739,7 @@ static void __set_pd_entry(struct i915_page_directory * const pd, #define set_pd_entry(pd, pde, to) ({ \ (pd)->entry[(pde)] = (to); \ - if (likely(pd_has_phys_page(pd))) \ - __set_pd_entry((pd), (pde), \ + __set_pd_entry((pd), (pde), \ gen8_pde_encode(px_dma(to), I915_CACHE_LLC)); \ }) @@ -764,8 +755,7 @@ __swap_pd_entry(struct i915_page_directory * const pd, if (likely(old == old_val)) { atomic_inc(&pd->used); - if (likely(pd_has_phys_page(pd))) - __set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC)); + __set_pd_entry(pd, pde, encode(daddr, I915_CACHE_LLC)); } return old; @@ -1539,6 +1529,50 @@ static void ppgtt_init(struct drm_i915_private *i915, ppgtt->vm.vma_ops.clear_pages = clear_pages; } +static void init_pd_n(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_directory *to, + const unsigned int entries) +{ + const u64 daddr = gen8_pde_encode(px_dma(to), I915_CACHE_LLC); + u64 * const vaddr = kmap_atomic(pd->base.page); + + memset64(vaddr, daddr, entries); + kunmap_atomic(vaddr); + + memset_p(pd->entry, to, entries); +} + +static struct i915_page_directory * +gen8_alloc_top_pd(struct i915_address_space *vm) +{ + struct i915_page_directory *pd; + + if (i915_vm_is_4lvl(vm)) { + pd = alloc_pd(vm); + if (!IS_ERR(pd)) + init_pd(vm, pd, vm->scratch_pdp); + + return pd; + } + + /* 3lvl */ + pd = __alloc_pd(); + if (!pd) + return ERR_PTR(-ENOMEM); + + pd->entry[GEN8_3LVL_PDPES] = NULL; + + if (unlikely(setup_page_dma(vm, &pd->base))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + + init_pd_n(vm, pd, vm->scratch_pd, GEN8_3LVL_PDPES); + + return pd; +} + /* * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -1548,6 +1582,7 @@ static void ppgtt_init(struct drm_i915_private *i915, */ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) { + struct i915_address_space *vm; struct i915_ppgtt *ppgtt; int err; @@ -1557,70 +1592,59 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915) ppgtt_init(i915, ppgtt); + vm = &ppgtt->vm; + /* * From bdw, there is hw support for read-only pages in the PPGTT. * * Gen11 has HSDES#:1807136187 unresolved. Disable ro support * for now. */ - ppgtt->vm.has_read_only = INTEL_GEN(i915) != 11; + vm->has_read_only = INTEL_GEN(i915) != 11; /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ if (IS_CHERRYVIEW(i915) || IS_BROXTON(i915)) - ppgtt->vm.pt_kmap_wc = true; + vm->pt_kmap_wc = true; - err = gen8_init_scratch(&ppgtt->vm); + err = gen8_init_scratch(vm); if (err) goto err_free; - ppgtt->pd = __alloc_pd(); - if (!ppgtt->pd) { - err = -ENOMEM; + ppgtt->pd = gen8_alloc_top_pd(vm); + if (IS_ERR(ppgtt->pd)) { + err = PTR_ERR(ppgtt->pd); goto err_free_scratch; } - if (i915_vm_is_4lvl(&ppgtt->vm)) { - err = setup_page_dma(&ppgtt->vm, &ppgtt->pd->base); - if (err) - goto err_free_pdp; - - init_pd(&ppgtt->vm, ppgtt->pd, ppgtt->vm.scratch_pdp); - - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl; + if (i915_vm_is_4lvl(vm)) { + vm->allocate_va_range = gen8_ppgtt_alloc_4lvl; + vm->insert_entries = gen8_ppgtt_insert_4lvl; + vm->clear_range = gen8_ppgtt_clear_4lvl; } else { - /* - * We don't need to setup dma for top level pdp, only - * for entries. So point entries to scratch. - */ - memset_p(ppgtt->pd->entry, ppgtt->vm.scratch_pd, - GEN8_3LVL_PDPES); - if (intel_vgpu_active(i915)) { err = gen8_preallocate_top_level_pdp(ppgtt); if (err) - goto err_free_pdp; + goto err_free_pd; } - ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl; - ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl; - ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl; + vm->allocate_va_range = gen8_ppgtt_alloc_3lvl; + vm->insert_entries = gen8_ppgtt_insert_3lvl; + vm->clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(i915)) gen8_ppgtt_notify_vgt(ppgtt, true); - ppgtt->vm.cleanup = gen8_ppgtt_cleanup; + vm->cleanup = gen8_ppgtt_cleanup; return ppgtt; -err_free_pdp: - free_pd(&ppgtt->vm, ppgtt->pd); +err_free_pd: + free_pd(vm, ppgtt->pd); err_free_scratch: - gen8_free_scratch(&ppgtt->vm); + gen8_free_scratch(vm); err_free: kfree(ppgtt); return ERR_PTR(err);