diff mbox

drm/i915: Use drm_mm for PPGTT PDEs

Message ID 1365648219-10221-1-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky April 11, 2013, 2:43 a.m. UTC
I think this is a nice generalization on it's own, but it's primarily
prep work for my PPGTT support. Does this bother anyone?

The only down side I can see is we waste 2k of cpu unmappable space
(unless we have something else that is <= 2k and doesn't need to be page
aligned).

It's RFC because I have only hacked this up and haven't thoroughly
tested a bunch of error and suspend/resume paths.

Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_drv.h     |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c | 56 ++++++++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 19 deletions(-)

Comments

Ben Widawsky April 11, 2013, 2:42 a.m. UTC | #1
Oops forget the RFC tag in the subject.

[snip]
Chris Wilson April 11, 2013, 6:10 a.m. UTC | #2
On Wed, Apr 10, 2013 at 07:43:39PM -0700, Ben Widawsky wrote:
> I think this is a nice generalization on it's own, but it's primarily
> prep work for my PPGTT support. Does this bother anyone?
> 
> The only down side I can see is we waste 2k of cpu unmappable space
> (unless we have something else that is <= 2k and doesn't need to be page
> aligned).
> 
> It's RFC because I have only hacked this up and haven't thoroughly
> tested a bunch of error and suspend/resume paths.

Ugh. Fragmentation, you need a top down allocator.
-Chris
Ben Widawsky April 11, 2013, 3:36 p.m. UTC | #3
On Thu, Apr 11, 2013 at 07:10:50AM +0100, Chris Wilson wrote:
> On Wed, Apr 10, 2013 at 07:43:39PM -0700, Ben Widawsky wrote:
> > I think this is a nice generalization on it's own, but it's primarily
> > prep work for my PPGTT support. Does this bother anyone?
> > 
> > The only down side I can see is we waste 2k of cpu unmappable space
> > (unless we have something else that is <= 2k and doesn't need to be page
> > aligned).
> > 
> > It's RFC because I have only hacked this up and haven't thoroughly
> > tested a bunch of error and suspend/resume paths.
> 
> Ugh. Fragmentation, you need a top down allocator.
> -Chris

For the current case we can easily address this by setting the range to
be the old range and keep the existing allocator but limit it to the
same portion it used before. This isn't very interesting though and the
previous code is probably better suited for this case anyway since it's
inherently much more robust against accidentally overwriting the PDEs.

Would you agree that on GEN7 + multiple PPGTTs; the fragmentation is a
"don't care"?
Chris Wilson April 12, 2013, 9:43 a.m. UTC | #4
On Thu, Apr 11, 2013 at 08:36:26AM -0700, Ben Widawsky wrote:
> On Thu, Apr 11, 2013 at 07:10:50AM +0100, Chris Wilson wrote:
> > On Wed, Apr 10, 2013 at 07:43:39PM -0700, Ben Widawsky wrote:
> > > I think this is a nice generalization on it's own, but it's primarily
> > > prep work for my PPGTT support. Does this bother anyone?
> > > 
> > > The only down side I can see is we waste 2k of cpu unmappable space
> > > (unless we have something else that is <= 2k and doesn't need to be page
> > > aligned).
> > > 
> > > It's RFC because I have only hacked this up and haven't thoroughly
> > > tested a bunch of error and suspend/resume paths.
> > 
> > Ugh. Fragmentation, you need a top down allocator.
> > -Chris
> 
> For the current case we can easily address this by setting the range to
> be the old range and keep the existing allocator but limit it to the
> same portion it used before. This isn't very interesting though and the
> previous code is probably better suited for this case anyway since it's
> inherently much more robust against accidentally overwriting the PDEs.

As discussed, my concern here is that we are limiting the maximum
object size by placing very long lived and unmovable objects right in
the middle of the GTT.
 
> Would you agree that on GEN7 + multiple PPGTTs; the fragmentation is a
> "don't care"?

Yes. Given true ppggt, then we mostly only care about fragmentation in
the mappable region.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3877c45..27573d8 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -440,6 +440,7 @@  struct i915_hw_ppgtt {
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
 	dma_addr_t scratch_page_dma_addr;
+	struct drm_mm_node node;
 
 	/* pte functions, mirroring the interface of the global gtt. */
 	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 50df194..b474afb 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -75,12 +75,9 @@  static inline gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev,
 	return pte;
 }
 
-static int gen6_ppgtt_enable(struct drm_device *dev)
+static void gen6_write_pdes(struct drm_i915_private *dev_priv,
+			    struct i915_hw_ppgtt *ppgtt)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	uint32_t pd_offset;
-	struct intel_ring_buffer *ring;
-	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 	gen6_gtt_pte_t __iomem *pd_addr;
 	uint32_t pd_entry;
 	int i;
@@ -97,6 +94,17 @@  static int gen6_ppgtt_enable(struct drm_device *dev)
 		writel(pd_entry, pd_addr + i);
 	}
 	readl(pd_addr);
+}
+
+static int gen6_ppgtt_enable(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	uint32_t pd_offset;
+	struct intel_ring_buffer *ring;
+	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+	int i;
+
+	gen6_write_pdes(dev_priv, ppgtt);
 
 	pd_offset = ppgtt->pd_offset;
 	pd_offset /= 64; /* in cachelines, */
@@ -208,6 +216,8 @@  static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
 {
 	int i;
 
+	drm_mm_put_block(&ppgtt->node);
+
 	if (ppgtt->pt_dma_addr) {
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
 			pci_unmap_page(ppgtt->dev->pdev,
@@ -226,15 +236,22 @@  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_device *dev = ppgtt->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	unsigned first_pd_entry_in_global_pt;
+	const u32 pde_size = I915_PPGTT_PD_ENTRIES * sizeof(gen6_gtt_pte_t);
 	int i;
 	int ret = -ENOMEM;
 
 	/* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
-	 * entries. For aliasing ppgtt support we just steal them at the end for
-	 * now. */
-	first_pd_entry_in_global_pt =
-		gtt_total_entries(dev_priv->gtt) - I915_PPGTT_PD_ENTRIES;
+	 * entries. Carve out enough space in the cpu unmappable area.
+	 */
+	BUG_ON(!drm_mm_initialized(&dev_priv->mm.gtt_space));
+	ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space,
+						  &ppgtt->node, pde_size,
+						  PAGE_SIZE, 0,
+						  dev_priv->gtt.mappable_end,
+						  dev_priv->gtt.total);
+	if (ret)
+		return ret;
+
 
 	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
 	ppgtt->enable = gen6_ppgtt_enable;
@@ -243,8 +260,10 @@  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->cleanup = gen6_ppgtt_cleanup;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
-	if (!ppgtt->pt_pages)
+	if (!ppgtt->pt_pages) {
+		drm_mm_put_block(&ppgtt->node);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < ppgtt->num_pd_entries; i++) {
 		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
@@ -274,7 +293,9 @@  static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 	ppgtt->clear_range(ppgtt, 0,
 			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
 
-	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
+	DRM_DEBUG_DRIVER("Allocated pde space at GSM offset: %lx\n",
+			 ppgtt->node.start / PAGE_SIZE);
+	ppgtt->pd_offset = ppgtt->node.start / PAGE_SIZE;
 
 	return 0;
 
@@ -291,6 +312,7 @@  err_pt_alloc:
 			__free_page(ppgtt->pt_pages[i]);
 	}
 	kfree(ppgtt->pt_pages);
+	drm_mm_put_block(&ppgtt->node);
 
 	return ret;
 }
@@ -397,6 +419,9 @@  void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
 				      dev_priv->gtt.total / PAGE_SIZE);
 
+	if (dev_priv->mm.aliasing_ppgtt)
+		gen6_write_pdes(dev_priv, dev_priv->mm.aliasing_ppgtt);
+
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) {
 		i915_gem_clflush_object(obj);
 		i915_gem_gtt_bind_object(obj, obj->cache_level);
@@ -641,12 +666,6 @@  void i915_gem_init_global_gtt(struct drm_device *dev)
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
 		int ret;
 
-		if (INTEL_INFO(dev)->gen <= 7) {
-			/* PPGTT pdes are stolen from global gtt ptes, so shrink the
-			 * aperture accordingly when using aliasing ppgtt. */
-			gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
-		}
-
 		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 
 		ret = i915_gem_init_aliasing_ppgtt(dev);
@@ -655,7 +674,6 @@  void i915_gem_init_global_gtt(struct drm_device *dev)
 
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
 		drm_mm_takedown(&dev_priv->mm.gtt_space);
-		gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
 	}
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 }