From patchwork Fri Aug 22 03:12:18 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ben Widawsky X-Patchwork-Id: 4760911 Return-Path: X-Original-To: patchwork-intel-gfx@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id 3439EC0338 for ; Fri, 22 Aug 2014 03:13:56 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id DF66920155 for ; Fri, 22 Aug 2014 03:13:54 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) by mail.kernel.org (Postfix) with ESMTP id 810CB20173 for ; Fri, 22 Aug 2014 03:13:53 +0000 (UTC) Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id B39996E879; Thu, 21 Aug 2014 20:13:51 -0700 (PDT) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTP id 4E0276E872 for ; Thu, 21 Aug 2014 20:13:50 -0700 (PDT) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga101.fm.intel.com with ESMTP; 21 Aug 2014 20:13:50 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.97,862,1389772800"; d="scan'208";a="375576133" Received: from unknown (HELO ironside.intel.com) ([10.255.12.192]) by FMSMGA003.fm.intel.com with ESMTP; 21 Aug 2014 20:09:56 -0700 From: Ben Widawsky To: Intel GFX Date: Thu, 21 Aug 2014 20:12:18 -0700 Message-Id: <1408677155-1840-56-git-send-email-benjamin.widawsky@intel.com> X-Mailer: git-send-email 2.0.4 In-Reply-To: <1408677155-1840-1-git-send-email-benjamin.widawsky@intel.com> References: <1408677155-1840-1-git-send-email-benjamin.widawsky@intel.com> Cc: Ben Widawsky , Ben Widawsky Subject: [Intel-gfx] [PATCH 55/68] drm/i915/bdw: Make pdp allocation more dynamic X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" X-Spam-Status: No, score=-4.9 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_MED, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This transitional patch doesn't do much for the existing code. However, it should make upcoming patches to use the full 48b address space a bit easier to swallow. The patch also introduces the PML4, ie. the new top level structure of the page tables. Signed-off-by: Ben Widawsky --- drivers/gpu/drm/i915/i915_drv.h | 5 ++ drivers/gpu/drm/i915/i915_gem_gtt.c | 114 +++++++++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 40 +++++++++---- 3 files changed, 128 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index beb9a66..ff921e6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2078,6 +2078,11 @@ struct drm_i915_cmd_table { #define HAS_PPGTT(dev) (INTEL_INFO(dev)->gen >= 7 && !IS_CHERRYVIEW(dev)) #define USES_PPGTT(dev) (i915.enable_ppgtt) #define USES_FULL_PPGTT(dev) (i915.enable_ppgtt == 2) +#ifdef CONFIG_64BIT +# define HAS_48B_PPGTT(dev) (IS_BROADWELL(dev) && false) +#else +# define HAS_48B_PPGTT(dev) false +#endif #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 84e139d..8e15842 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -394,6 +394,45 @@ free_pd: return ERR_PTR(ret); } +static void __pdp_fini(struct i915_pagedirpo *pdp) +{ + kfree(pdp->used_pdpes); + kfree(pdp->pagedirs); + /* HACK */ + pdp->pagedirs = NULL; +} + +static void free_pdp_single(struct i915_pagedirpo *pdp, + struct drm_device *dev) +{ + __pdp_fini(pdp); + if (HAS_48B_PPGTT(dev)) + kfree(pdp); +} + +static int __pdp_init(struct i915_pagedirpo *pdp, + struct drm_device *dev) +{ + size_t pdpes = I915_PDPES_PER_PDP(dev); + + pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), + sizeof(unsigned long), + GFP_KERNEL); + if (!pdp->used_pdpes) + return -ENOMEM; + + pdp->pagedirs = kcalloc(pdpes, sizeof(*pdp->pagedirs), GFP_KERNEL); + if (!pdp->pagedirs) { + kfree(pdp->used_pdpes); + /* the PDP might be the statically allocated top level. Keep it + * as clean as possible */ + pdp->used_pdpes = NULL; + return -ENOMEM; + } + + return 0; +} + /* Broadwell Page Directory Pointer Descriptors */ static int gen8_write_pdp(struct intel_engine_cs *ring, unsigned entry, @@ -432,7 +471,7 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt, { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = 3; i >= 0; i--) { struct i915_pagedir *pd = ppgtt->pdp.pagedirs[i]; dma_addr_t pd_daddr = pd ? pd->daddr : ppgtt->scratch_pd->daddr; /* The page directory might be NULL, but we need to clear out @@ -506,9 +545,6 @@ static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, pt_vaddr = NULL; for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { - if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES)) - break; - if (pt_vaddr == NULL) { struct i915_pagedir *pd = ppgtt->pdp.pagedirs[pdpe]; struct i915_pagetab *pt = pd->page_tables[pde]; @@ -574,11 +610,17 @@ static void __gen8_teardown_va_range(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); + struct drm_device *dev = vm->dev; struct i915_pagedir *pd; struct i915_pagetab *pt; uint64_t temp; uint32_t pdpe, pde; + if (!ppgtt->pdp.pagedirs) { + /* If pagedirs are already free, there is nothing to do.*/ + return; + } + gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) { uint64_t pd_len = gen8_clamp_pd(start, length); uint64_t pd_start = start; @@ -633,9 +675,8 @@ static void __gen8_teardown_va_range(struct i915_address_space *vm, pt->zombie = 1; continue; } - free_pt_single(pt, vm->dev); + free_pt_single(pt, dev); pd->page_tables[pde] = NULL; - } } @@ -649,10 +690,15 @@ static void __gen8_teardown_va_range(struct i915_address_space *vm, pd->zombie = 1; continue; } - free_pd_single(pd, vm->dev); + free_pd_single(pd, dev); ppgtt->pdp.pagedirs[pdpe] = NULL; } } + + if (bitmap_empty(ppgtt->pdp.used_pdpes, I915_PDPES_PER_PDP(dev))) { + /* TODO: When pagetables are fully dynamic: + free_pdp_single(&ppgtt->pdp, dev); */ + } } static void gen8_teardown_va_range(struct i915_address_space *vm, @@ -669,6 +715,9 @@ static void gen8_ppgtt_free(struct i915_hw_ppgtt *ppgtt) __gen8_teardown_va_range(&ppgtt->base, ppgtt->base.start, ppgtt->base.total, true); + WARN_ON(!bitmap_empty(ppgtt->pdp.used_pdpes, + I915_PDPES_PER_PDP(ppgtt->base.dev))); + free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -769,11 +818,13 @@ static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, uint64_t length, unsigned long *new_pds) { + struct drm_device *dev = ppgtt->base.dev; struct i915_pagedir *pd; uint64_t temp; uint32_t pdpe; + size_t pdpes = I915_PDPES_PER_PDP(ppgtt->base.dev); - BUG_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES)); + BUG_ON(!bitmap_empty(new_pds, pdpes)); /* FIXME: PPGTT container_of won't work for 64b */ BUG_ON((start + length) > 0x800000000ULL); @@ -793,17 +844,18 @@ static int gen8_ppgtt_alloc_pagedirs(struct i915_hw_ppgtt *ppgtt, return 0; unwind_out: - for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_pds, pdpes) free_pd_single(pdp->pagedirs[pdpe], ppgtt->base.dev); return -ENOMEM; } static inline void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) +free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts, + size_t pdpes) { int i; - for (i = 0; i < GEN8_LEGACY_PDPES; i++) + for (i = 0; i < pdpes; i++) kfree(new_pts[i]); kfree(new_pts); kfree(new_pds); @@ -813,13 +865,14 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts) * of these are based on the number of PDPEs in the system. */ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long ***new_pts) + unsigned long ***new_pts, + size_t pdpes) { int i; unsigned long *pds; unsigned long **pts; - pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL); + pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_KERNEL); if (!pds) return -ENOMEM; @@ -829,7 +882,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return -ENOMEM; } - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < pdpes; i++) { pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES_PER_PD), sizeof(unsigned long), GFP_KERNEL); if (!pts[i]) @@ -842,7 +895,7 @@ int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, return 0; err_out: - free_gen8_temp_bitmaps(pds, pts); + free_gen8_temp_bitmaps(pds, pts, pdpes); return -ENOMEM; } @@ -853,11 +906,13 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = container_of(vm, struct i915_hw_ppgtt, base); unsigned long *new_page_dirs, **new_page_tables; + struct drm_device *dev = vm->dev; struct i915_pagedir *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; uint64_t temp; uint32_t pdpe; + size_t pdpes = I915_PDPES_PER_PDP(dev); int ret; #ifndef CONFIG_64BIT @@ -874,7 +929,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, if (WARN_ON(start + length < start)) return -ERANGE; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); if (ret) return ret; @@ -882,7 +937,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_pagedirs(ppgtt, &ppgtt->pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } @@ -944,7 +999,7 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, ppgtt->pdp.pagedirs[pdpe]->zombie = 0; } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return 0; err_out: @@ -953,13 +1008,19 @@ err_out: free_pt_single(pd->page_tables[temp], vm->dev); } - for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES) + for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd_single(ppgtt->pdp.pagedirs[pdpe], vm->dev); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs, new_page_tables, pdpes); return ret; } +static void gen8_ppgtt_fini_common(struct i915_hw_ppgtt *ppgtt) +{ + free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + free_pdp_single(&ppgtt->pdp, ppgtt->base.dev); +} + /** * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers * with a net effect resembling a 2-level page table in normal x86 terms. Each @@ -981,6 +1042,17 @@ static int gen8_ppgtt_init_common(struct i915_hw_ppgtt *ppgtt, uint64_t size) ppgtt->enable = gen8_ppgtt_enable; ppgtt->switch_mm = gen8_mm_switch; + if (!HAS_48B_PPGTT(ppgtt->base.dev)) { + int ret = __pdp_init(&ppgtt->pdp, false); + if (ret) { + free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + return ret; + } + + ppgtt->switch_mm = gen8_mm_switch; + } else + BUG(); /* Not yet implemented */ + return 0; } @@ -1002,7 +1074,7 @@ static int gen8_aliasing_ppgtt_init(struct i915_hw_ppgtt *ppgtt) * eventually. */ ret = gen8_alloc_va_range(&ppgtt->base, start, size); if (ret) { - free_pt_scratch(ppgtt->scratch_pd, ppgtt->base.dev); + gen8_ppgtt_fini_common(ppgtt); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index d9759c7..95b5d16 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -88,7 +88,6 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ #define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ -#define GEN8_LEGACY_PDPES 4 #define GEN8_PTES_PER_PT (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) /* GEN8 legacy style address is defined as a 3 level page table: @@ -97,8 +96,17 @@ typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. */ +#ifdef CONFIG_64BIT +# define I915_PDPES_PER_PDP(dev) (HAS_48B_PPGTT(dev) ? 512 : 4) +#else +# define I915_PDPES_PER_PDP 4 +#endif +#define GEN8_PML4ES_PER_PML4 512 +#define GEN8_PML4E_SHIFT 39 #define GEN8_PDPE_SHIFT 30 -#define GEN8_PDPE_MASK 0x3 +/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page + * tables */ +#define GEN8_PDPE_MASK 0x1ff #define GEN8_PDE_SHIFT 21 #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) @@ -214,9 +222,17 @@ struct i915_pagedir { }; struct i915_pagedirpo { - /* struct page *page; */ - DECLARE_BITMAP(used_pdpes, GEN8_LEGACY_PDPES); - struct i915_pagedir *pagedirs[GEN8_LEGACY_PDPES]; + struct page *page; + dma_addr_t daddr; + unsigned long *used_pdpes; + struct i915_pagedir **pagedirs; +}; + +struct i915_pml4 { + struct page *page; + dma_addr_t daddr; + DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); + struct i915_pagedirpo *pdps[GEN8_PML4ES_PER_PML4]; }; struct i915_address_space { @@ -282,8 +298,9 @@ struct i915_hw_ppgtt { struct kref ref; struct drm_mm_node node; union { - struct i915_pagedirpo pdp; - struct i915_pagedir pd; + struct i915_pml4 pml4; /* GEN8+ & 64b PPGTT */ + struct i915_pagedirpo pdp; /* GEN8+ */ + struct i915_pagedir pd; /* GEN6-7 */ }; union { @@ -430,14 +447,17 @@ static inline size_t gen6_pde_count(uint32_t addr, uint32_t length) temp = min(temp, length), \ start += temp, length -= temp) -#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ - for (iter = gen8_pdpe_index(start), pd = (pdp)->pagedirs[iter]; \ - length > 0 && iter < GEN8_LEGACY_PDPES; \ +#define gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, b) \ + for (iter = gen8_pdpe_index(start), pd = (pdp)->pagedirs[iter]; \ + length > 0 && (iter < b); \ pd = (pdp)->pagedirs[++iter], \ temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT) - start, \ temp = min(temp, length), \ start += temp, length -= temp) +#define gen8_for_each_pdpe(pd, pdp, start, length, temp, iter) \ + gen8_for_each_pdpe_e(pd, pdp, start, length, temp, iter, I915_PDPES_PER_PDP(dev)) + /* Clamp length to the next pagetab boundary */ static inline uint64_t gen8_clamp_pt(uint64_t start, uint64_t length) {