diff mbox

[CI,17/25] drm/i915: Assign every HW context a unique ID

Message ID 1461833819-3991-17-git-send-email-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson April 28, 2016, 8:56 a.m. UTC
The hardware tracks contexts and expects all live contexts (those active
on the hardware) to have a unique identifier. This is used by the
hardware to assign pagefaults and the like to a particular context.

v2: Reorder to make sure ctx->link is not left dangling if the
assignment of a hw_id fails (Mika).

v3: We have 21bits of context space, not 20.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
 drivers/gpu/drm/i915/i915_drv.h         | 10 +++++++++
 drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

Comments

Dave Gordon April 28, 2016, 2:55 p.m. UTC | #1
On 28/04/16 09:56, Chris Wilson wrote:
> The hardware tracks contexts and expects all live contexts (those active
> on the hardware) to have a unique identifier. This is used by the
> hardware to assign pagefaults and the like to a particular context.
>
> v2: Reorder to make sure ctx->link is not left dangling if the
> assignment of a hw_id fails (Mika).
>
> v3: We have 21bits of context space, not 20.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
>   drivers/gpu/drm/i915/i915_drv.h         | 10 +++++++++
>   drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++++++++++
>   3 files changed, 47 insertions(+), 1 deletion(-)

[snip]

> +static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
> +{
> +	int ret;
> +
> +	ret = ida_simple_get(&dev_priv->context_hw_ida,
> +			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> +	if (ret < 0) {
> +		/* Contexts are only released when no longer active.
> +		 * Flush any pending retires to hopefully release some
> +		 * stale contexts and try again.
> +		 */
> +		i915_gem_retire_requests(dev_priv->dev);
> +		ret = ida_simple_get(&dev_priv->context_hw_ida,
> +				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);

Should we once again reserve ID 0 to make it easier to identify things 
that have not yet been initialised properly?

.Dave.
Chris Wilson April 28, 2016, 3:32 p.m. UTC | #2
On Thu, Apr 28, 2016 at 03:55:44PM +0100, Dave Gordon wrote:
> On 28/04/16 09:56, Chris Wilson wrote:
> >The hardware tracks contexts and expects all live contexts (those active
> >on the hardware) to have a unique identifier. This is used by the
> >hardware to assign pagefaults and the like to a particular context.
> >
> >v2: Reorder to make sure ctx->link is not left dangling if the
> >assignment of a hw_id fails (Mika).
> >
> >v3: We have 21bits of context space, not 20.
> >
> >Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >---
> >  drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
> >  drivers/gpu/drm/i915/i915_drv.h         | 10 +++++++++
> >  drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++++++++++
> >  3 files changed, 47 insertions(+), 1 deletion(-)
> 
> [snip]
> 
> >+static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
> >+{
> >+	int ret;
> >+
> >+	ret = ida_simple_get(&dev_priv->context_hw_ida,
> >+			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> >+	if (ret < 0) {
> >+		/* Contexts are only released when no longer active.
> >+		 * Flush any pending retires to hopefully release some
> >+		 * stale contexts and try again.
> >+		 */
> >+		i915_gem_retire_requests(dev_priv->dev);
> >+		ret = ida_simple_get(&dev_priv->context_hw_ida,
> >+				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> 
> Should we once again reserve ID 0 to make it easier to identify
> things that have not yet been initialised properly?

Otoh, 0 currently is the kernel context. Since we can only get an
uninitialised value for a use-after-free, I don't see an advantage here
above and beyond using poisoning, kasan or kmemcheck.
-Chris
Dave Gordon April 28, 2016, 6:08 p.m. UTC | #3
On 28/04/16 16:32, Chris Wilson wrote:
> On Thu, Apr 28, 2016 at 03:55:44PM +0100, Dave Gordon wrote:
>> On 28/04/16 09:56, Chris Wilson wrote:
>>> The hardware tracks contexts and expects all live contexts (those active
>>> on the hardware) to have a unique identifier. This is used by the
>>> hardware to assign pagefaults and the like to a particular context.
>>>
>>> v2: Reorder to make sure ctx->link is not left dangling if the
>>> assignment of a hw_id fails (Mika).
>>>
>>> v3: We have 21bits of context space, not 20.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
>>>   drivers/gpu/drm/i915/i915_drv.h         | 10 +++++++++
>>>   drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++++++++++
>>>   3 files changed, 47 insertions(+), 1 deletion(-)
>>
>> [snip]
>>
>>> +static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
>>> +{
>>> +	int ret;
>>> +
>>> +	ret = ida_simple_get(&dev_priv->context_hw_ida,
>>> +			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
>>> +	if (ret < 0) {
>>> +		/* Contexts are only released when no longer active.
>>> +		 * Flush any pending retires to hopefully release some
>>> +		 * stale contexts and try again.
>>> +		 */
>>> +		i915_gem_retire_requests(dev_priv->dev);
>>> +		ret = ida_simple_get(&dev_priv->context_hw_ida,
>>> +				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
>>
>> Should we once again reserve ID 0 to make it easier to identify
>> things that have not yet been initialised properly?
>
> Otoh, 0 currently is the kernel context. Since we can only get an
> uninitialised value for a use-after-free, I don't see an advantage here
> above and beyond using poisoning, kasan or kmemcheck.
> -Chris

I was thinking more of a use-before-complete-initialisation case, rather 
than a use-after-free. Obviously shouldn't happen, but it does (or used 
to) with the list-head in the request structure.

.Dave.
Chris Wilson April 28, 2016, 6:35 p.m. UTC | #4
On Thu, Apr 28, 2016 at 07:08:18PM +0100, Dave Gordon wrote:
> On 28/04/16 16:32, Chris Wilson wrote:
> >On Thu, Apr 28, 2016 at 03:55:44PM +0100, Dave Gordon wrote:
> >>On 28/04/16 09:56, Chris Wilson wrote:
> >>>The hardware tracks contexts and expects all live contexts (those active
> >>>on the hardware) to have a unique identifier. This is used by the
> >>>hardware to assign pagefaults and the like to a particular context.
> >>>
> >>>v2: Reorder to make sure ctx->link is not left dangling if the
> >>>assignment of a hw_id fails (Mika).
> >>>
> >>>v3: We have 21bits of context space, not 20.
> >>>
> >>>Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>>Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>---
> >>>  drivers/gpu/drm/i915/i915_debugfs.c     |  2 +-
> >>>  drivers/gpu/drm/i915/i915_drv.h         | 10 +++++++++
> >>>  drivers/gpu/drm/i915/i915_gem_context.c | 36 +++++++++++++++++++++++++++++++++
> >>>  3 files changed, 47 insertions(+), 1 deletion(-)
> >>
> >>[snip]
> >>
> >>>+static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
> >>>+{
> >>>+	int ret;
> >>>+
> >>>+	ret = ida_simple_get(&dev_priv->context_hw_ida,
> >>>+			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> >>>+	if (ret < 0) {
> >>>+		/* Contexts are only released when no longer active.
> >>>+		 * Flush any pending retires to hopefully release some
> >>>+		 * stale contexts and try again.
> >>>+		 */
> >>>+		i915_gem_retire_requests(dev_priv->dev);
> >>>+		ret = ida_simple_get(&dev_priv->context_hw_ida,
> >>>+				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
> >>
> >>Should we once again reserve ID 0 to make it easier to identify
> >>things that have not yet been initialised properly?
> >
> >Otoh, 0 currently is the kernel context. Since we can only get an
> >uninitialised value for a use-after-free, I don't see an advantage here
> >above and beyond using poisoning, kasan or kmemcheck.
> >-Chris
> 
> I was thinking more of a use-before-complete-initialisation case,
> rather than a use-after-free. Obviously shouldn't happen, but it
> does (or used to) with the list-head in the request structure.

It's assigned to the context before use - that's a window we have
control over.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 8b8d6f07d7aa..98fac85217cb 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2004,7 +2004,7 @@  static int i915_context_status(struct seq_file *m, void *unused)
 		    ctx->legacy_hw_ctx.rcs_state == NULL)
 			continue;
 
-		seq_puts(m, "HW context ");
+		seq_printf(m, "HW context %u ", ctx->hw_id);
 		describe_ctx(m, ctx);
 		if (ctx == dev_priv->kernel_context)
 			seq_printf(m, "(kernel context) ");
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 831da9f43324..0f7575252c6e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -851,6 +851,9 @@  struct intel_context {
 	struct i915_ctx_hang_stats hang_stats;
 	struct i915_hw_ppgtt *ppgtt;
 
+	/* Unique identifier for this context, used by the hw for tracking */
+	unsigned hw_id;
+
 	/* Legacy ring buffer submission */
 	struct {
 		struct drm_i915_gem_object *rcs_state;
@@ -1838,6 +1841,13 @@  struct drm_i915_private {
 	DECLARE_HASHTABLE(mm_structs, 7);
 	struct mutex mm_lock;
 
+	/* The hw wants to have a stable context identifier for the lifetime
+	 * of the context (for OA, PASID, faults, etc). This is limited
+	 * in execlists to 21 bits.
+	 */
+	struct ida context_hw_ida;
+#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
+
 	/* Kernel Modesetting */
 
 	struct drm_crtc *plane_to_crtc_mapping[I915_MAX_PIPES];
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 4071a0ec134a..9477af0fcb33 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -171,6 +171,8 @@  void i915_gem_context_free(struct kref *ctx_ref)
 	if (ctx->legacy_hw_ctx.rcs_state)
 		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
 	list_del(&ctx->link);
+
+	ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
 	kfree(ctx);
 }
 
@@ -211,6 +213,28 @@  i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	return obj;
 }
 
+static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
+{
+	int ret;
+
+	ret = ida_simple_get(&dev_priv->context_hw_ida,
+			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+	if (ret < 0) {
+		/* Contexts are only released when no longer active.
+		 * Flush any pending retires to hopefully release some
+		 * stale contexts and try again.
+		 */
+		i915_gem_retire_requests(dev_priv->dev);
+		ret = ida_simple_get(&dev_priv->context_hw_ida,
+				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
+		if (ret < 0)
+			return ret;
+	}
+
+	*out = ret;
+	return 0;
+}
+
 static struct intel_context *
 __create_hw_context(struct drm_device *dev,
 		    struct drm_i915_file_private *file_priv)
@@ -223,6 +247,12 @@  __create_hw_context(struct drm_device *dev,
 	if (ctx == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	ret = assign_hw_id(dev_priv, &ctx->hw_id);
+	if (ret) {
+		kfree(ctx);
+		return ERR_PTR(ret);
+	}
+
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
@@ -366,6 +396,10 @@  int i915_gem_context_init(struct drm_device *dev)
 		}
 	}
 
+	/* Using the simple ida interface, the max is limited by sizeof(int) */
+	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
+	ida_init(&dev_priv->context_hw_ida);
+
 	if (i915.enable_execlists) {
 		/* NB: intentionally left blank. We will allocate our own
 		 * backing objects as we need them, thank you very much */
@@ -429,6 +463,8 @@  void i915_gem_context_fini(struct drm_device *dev)
 
 	i915_gem_context_unreference(dctx);
 	dev_priv->kernel_context = NULL;
+
+	ida_destroy(&dev_priv->context_hw_ida);
 }
 
 static int context_idr_cleanup(int id, void *p, void *data)