diff mbox

[10/53] drm/i915/bdw: A bit more advanced context init/fini

Message ID 1402673891-14618-11-git-send-email-oscar.mateo@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

oscar.mateo@intel.com June 13, 2014, 3:37 p.m. UTC
From: Oscar Mateo <oscar.mateo@intel.com>

There are a few big differences between context init and fini with the
previous implementation of hardware contexts. One of them is
demonstrated in this patch: we must allocate a ctx backing object for
each engine.

Regarding the context size, reading the register to calculate the sizes
can work, I think, however the docs are very clear about the actual
context sizes on GEN8, so just hardcode that and use it.

v2: Rebased on top of the Full PPGTT series. It is important to notice
that at this point we have one global default context per engine, all
of them using the aliasing PPGTT (as opposed to the single global
default context we have with legacy HW contexts).

v3:
- Go back to one single global default context, this time with multiple
  backing objects inside.
- Use different context sizes for non-render engines, as suggested by
  Damien (still hardcoded, since the information about the context size
  registers in the BSpec is, well, *lacking*).
- Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien).
- Move default context backing object creation to intel_init_ring (so
  that we don't waste memory in rings that might not get initialized).

v4:
- Reuse the HW legacy context init/fini.
- Create a separate free function.
- Rename the functions with an intel_ preffix.

Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  3 ++
 drivers/gpu/drm/i915/i915_gem_context.c | 19 +++++++--
 drivers/gpu/drm/i915/intel_lrc.c        | 70 +++++++++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 4 deletions(-)

Comments

bradley.d.volkin@intel.com June 18, 2014, 10:13 p.m. UTC | #1
On Fri, Jun 13, 2014 at 08:37:28AM -0700, oscar.mateo@intel.com wrote:
> From: Oscar Mateo <oscar.mateo@intel.com>
> 
> There are a few big differences between context init and fini with the
> previous implementation of hardware contexts. One of them is
> demonstrated in this patch: we must allocate a ctx backing object for
> each engine.
> 
> Regarding the context size, reading the register to calculate the sizes
> can work, I think, however the docs are very clear about the actual
> context sizes on GEN8, so just hardcode that and use it.
> 
> v2: Rebased on top of the Full PPGTT series. It is important to notice
> that at this point we have one global default context per engine, all
> of them using the aliasing PPGTT (as opposed to the single global
> default context we have with legacy HW contexts).
> 
> v3:
> - Go back to one single global default context, this time with multiple
>   backing objects inside.
> - Use different context sizes for non-render engines, as suggested by
>   Damien (still hardcoded, since the information about the context size
>   registers in the BSpec is, well, *lacking*).
> - Render ctx size is 20 (or 19) pages, but not 21 (caught by Damien).
> - Move default context backing object creation to intel_init_ring (so
>   that we don't waste memory in rings that might not get initialized).
> 
> v4:
> - Reuse the HW legacy context init/fini.
> - Create a separate free function.
> - Rename the functions with an intel_ preffix.
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net> (v1)
> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  3 ++
>  drivers/gpu/drm/i915/i915_gem_context.c | 19 +++++++--
>  drivers/gpu/drm/i915/intel_lrc.c        | 70 +++++++++++++++++++++++++++++++++
>  3 files changed, 88 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index dac0db1..347308e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2425,6 +2425,9 @@ i915_gem_context_validate(struct drm_device *dev, struct drm_file *file,
>  
>  /* intel_lrc.c */
>  bool intel_enable_execlists(struct drm_device *dev);
> +void intel_lr_context_free(struct intel_context *ctx);
> +int intel_lr_context_deferred_create(struct intel_context *ctx,
> +				     struct intel_engine_cs *ring);
>  
>  /* i915_gem_render_state.c */
>  int i915_gem_render_state_init(struct intel_engine_cs *ring);
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 3f3fb36..1fb4592 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -180,8 +180,11 @@ void i915_gem_context_free(struct kref *ctx_ref)
>  {
>  	struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
>  	struct drm_device *dev = ctx->dev;
> +	struct drm_i915_private *dev_priv = dev->dev_private;
>  
> -	if (ctx->render_obj) {
> +	if (dev_priv->lrc_enabled)
> +		intel_lr_context_free(ctx);
> +	else if (ctx->render_obj) {
>  		/* XXX: Free up the object before tearing down the address space, in
>  		 * case we're bound in the PPGTT */

Does this comment apply to both cases?

>  		drm_gem_object_unreference(&ctx->render_obj->base);
> @@ -438,9 +441,17 @@ int i915_gem_context_init(struct drm_device *dev)
>  		return PTR_ERR(ctx);
>  	}
>  
> -	/* NB: RCS will hold a ref for all rings */
> -	for (i = 0; i < I915_NUM_RINGS; i++)
> -		dev_priv->ring[i].default_context = ctx;
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		struct intel_engine_cs *ring = &dev_priv->ring[i];
> +
> +		/* NB: RCS will hold a ref for all rings */
> +		ring->default_context = ctx;
> +
> +		/* FIXME: we only want to do this for initialized rings, but for that
> +		 * we first need the new logical ring stuff */
> +		if (dev_priv->lrc_enabled)
> +			intel_lr_context_deferred_create(ctx, ring);
> +	}
>  
>  	DRM_DEBUG_DRIVER("%s context support initialized\n",
>  			dev_priv->lrc_enabled ? "LR" :
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 58cead1..952212f 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -41,6 +41,11 @@
>  #include <drm/i915_drm.h>
>  #include "i915_drv.h"
>  
> +#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
> +#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
> +
> +#define GEN8_LR_CONTEXT_ALIGN 4096
> +
>  bool intel_enable_execlists(struct drm_device *dev)
>  {
>  	if (!i915.enable_execlists)
> @@ -48,3 +53,68 @@ bool intel_enable_execlists(struct drm_device *dev)
>  
>  	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
>  }
> +
> +void intel_lr_context_free(struct intel_context *ctx)
> +{
> +	int i;
> +
> +	for (i = 0; i < I915_NUM_RINGS; i++) {
> +		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].obj;
> +		if (ctx_obj) {
> +			i915_gem_object_ggtt_unpin(ctx_obj);

I suspect that leaving the backing objects pinned in ggtt for their entire
lifetimes is going to eventually cause memory related issues. We might need
to look at managing the binding more dynamically - similar to what the
legacy context code already does.

Brad

> +			drm_gem_object_unreference(&ctx_obj->base);
> +		}
> +	}
> +}
> +
> +static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
> +{
> +	int ret = 0;
> +
> +	WARN_ON(INTEL_INFO(ring->dev)->gen != 8);
> +
> +	switch (ring->id) {
> +	case RCS:
> +		ret = GEN8_LR_CONTEXT_RENDER_SIZE;
> +		break;
> +	case VCS:
> +	case BCS:
> +	case VECS:
> +	case VCS2:
> +		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +int intel_lr_context_deferred_create(struct intel_context *ctx,
> +				     struct intel_engine_cs *ring)
> +{
> +	struct drm_device *dev = ring->dev;
> +	struct drm_i915_gem_object *ctx_obj;
> +	uint32_t context_size;
> +	int ret;
> +
> +	WARN_ON(ctx->render_obj != NULL);
> +
> +	context_size = round_up(get_lr_context_size(ring), 4096);
> +
> +	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
> +	if (IS_ERR(ctx_obj)) {
> +		ret = PTR_ERR(ctx_obj);
> +		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
> +		return ret;
> +	}
> +
> +	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
> +	if (ret) {
> +		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
> +		drm_gem_object_unreference(&ctx_obj->base);
> +		return ret;
> +	}
> +
> +	ctx->engine[ring->id].obj = ctx_obj;
> +
> +	return 0;
> +}
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Daniel Vetter June 19, 2014, 6:13 a.m. UTC | #2
On Thu, Jun 19, 2014 at 12:13 AM, Volkin, Bradley D
<bradley.d.volkin@intel.com> wrote:
>> +void intel_lr_context_free(struct intel_context *ctx)
>> +{
>> +     int i;
>> +
>> +     for (i = 0; i < I915_NUM_RINGS; i++) {
>> +             struct drm_i915_gem_object *ctx_obj = ctx->engine[i].obj;
>> +             if (ctx_obj) {
>> +                     i915_gem_object_ggtt_unpin(ctx_obj);
>
> I suspect that leaving the backing objects pinned in ggtt for their entire
> lifetimes is going to eventually cause memory related issues. We might need
> to look at managing the binding more dynamically - similar to what the
> legacy context code already does.

Oh, I didin't spot this. We definitely need the same handling as for
legacy ring handling, so:
- Only pin while a ctx is used, shoveling the old context through the
active list for timeline unbinding.
- Last-ditch effort in evict_something to switch to the default context.

Without that we'll fragment the global gtt badly even before we
exhaust it, and that means we can't pin scanout buffers any more and
can't handle gtt mmap faults any more. There should be a nasty
testcase somewhere even to exercise the last-ditch context evict code.
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index dac0db1..347308e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2425,6 +2425,9 @@  i915_gem_context_validate(struct drm_device *dev, struct drm_file *file,
 
 /* intel_lrc.c */
 bool intel_enable_execlists(struct drm_device *dev);
+void intel_lr_context_free(struct intel_context *ctx);
+int intel_lr_context_deferred_create(struct intel_context *ctx,
+				     struct intel_engine_cs *ring);
 
 /* i915_gem_render_state.c */
 int i915_gem_render_state_init(struct intel_engine_cs *ring);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3f3fb36..1fb4592 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -180,8 +180,11 @@  void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct intel_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
 	struct drm_device *dev = ctx->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (ctx->render_obj) {
+	if (dev_priv->lrc_enabled)
+		intel_lr_context_free(ctx);
+	else if (ctx->render_obj) {
 		/* XXX: Free up the object before tearing down the address space, in
 		 * case we're bound in the PPGTT */
 		drm_gem_object_unreference(&ctx->render_obj->base);
@@ -438,9 +441,17 @@  int i915_gem_context_init(struct drm_device *dev)
 		return PTR_ERR(ctx);
 	}
 
-	/* NB: RCS will hold a ref for all rings */
-	for (i = 0; i < I915_NUM_RINGS; i++)
-		dev_priv->ring[i].default_context = ctx;
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
+
+		/* NB: RCS will hold a ref for all rings */
+		ring->default_context = ctx;
+
+		/* FIXME: we only want to do this for initialized rings, but for that
+		 * we first need the new logical ring stuff */
+		if (dev_priv->lrc_enabled)
+			intel_lr_context_deferred_create(ctx, ring);
+	}
 
 	DRM_DEBUG_DRIVER("%s context support initialized\n",
 			dev_priv->lrc_enabled ? "LR" :
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 58cead1..952212f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -41,6 +41,11 @@ 
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_ALIGN 4096
+
 bool intel_enable_execlists(struct drm_device *dev)
 {
 	if (!i915.enable_execlists)
@@ -48,3 +53,68 @@  bool intel_enable_execlists(struct drm_device *dev)
 
 	return HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev);
 }
+
+void intel_lr_context_free(struct intel_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].obj;
+		if (ctx_obj) {
+			i915_gem_object_ggtt_unpin(ctx_obj);
+			drm_gem_object_unreference(&ctx_obj->base);
+		}
+	}
+}
+
+static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
+{
+	int ret = 0;
+
+	WARN_ON(INTEL_INFO(ring->dev)->gen != 8);
+
+	switch (ring->id) {
+	case RCS:
+		ret = GEN8_LR_CONTEXT_RENDER_SIZE;
+		break;
+	case VCS:
+	case BCS:
+	case VECS:
+	case VCS2:
+		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
+		break;
+	}
+
+	return ret;
+}
+
+int intel_lr_context_deferred_create(struct intel_context *ctx,
+				     struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_gem_object *ctx_obj;
+	uint32_t context_size;
+	int ret;
+
+	WARN_ON(ctx->render_obj != NULL);
+
+	context_size = round_up(get_lr_context_size(ring), 4096);
+
+	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+	if (IS_ERR(ctx_obj)) {
+		ret = PTR_ERR(ctx_obj);
+		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
+		drm_gem_object_unreference(&ctx_obj->base);
+		return ret;
+	}
+
+	ctx->engine[ring->id].obj = ctx_obj;
+
+	return 0;
+}