diff mbox series

[33/38] drm/i915: Remove last traces of exec-id (GEM_BUSY)

Message ID 20190118140109.25261-34-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/38] drm/i915/execlists: Store the highest priority context | expand

Commit Message

Chris Wilson Jan. 18, 2019, 2:01 p.m. UTC
As we allow per-context engine allows the legacy concept of
I915_EXEC_RING no longer applies universally. We are still exposing the
unrelated exec-id in GEM_BUSY, so transition this ioctl (once more
slightly changing its ABI, but no one cares) over to only reporting the
uabi-class (not instance as we can not foreseeably fit those into the
small bitmask).

The only user of the extended ring information from GEM_BUSY is ddx/sna,
which tries to use the non-rcs business information to guide which
engine to use for subsequent operations on foreign bo. All that matters
for it is the decision between rcs and !rcs, so it is unaffected by the
change in higher bits.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c         | 32 +++++++++++++------------
 drivers/gpu/drm/i915/intel_engine_cs.c  | 10 --------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 -
 include/uapi/drm/i915_drm.h             | 32 +++++++++++++------------
 4 files changed, 34 insertions(+), 41 deletions(-)
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9a78e1c9b323..4c8691533e9b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3939,20 +3939,17 @@  i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
 
 static __always_inline unsigned int __busy_read_flag(unsigned int id)
 {
-	/* Note that we could alias engines in the execbuf API, but
-	 * that would be very unwise as it prevents userspace from
-	 * fine control over engine selection. Ahem.
-	 *
-	 * This should be something like EXEC_MAX_ENGINE instead of
-	 * I915_NUM_ENGINES.
-	 */
-	BUILD_BUG_ON(I915_NUM_ENGINES > 16);
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000;
+
+	GEM_BUG_ON(id >= 16);
 	return 0x10000 << id;
 }
 
 static __always_inline unsigned int __busy_write_id(unsigned int id)
 {
-	/* The uABI guarantees an active writer is also amongst the read
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
 	 * engines. This would be true if we accessed the activity tracking
 	 * under the lock, but as we perform the lookup of the object and
 	 * its activity locklessly we can not guarantee that the last_write
@@ -3960,16 +3957,20 @@  static __always_inline unsigned int __busy_write_id(unsigned int id)
 	 * last_read - hence we always set both read and write busy for
 	 * last_write.
 	 */
-	return id | __busy_read_flag(id);
+	if (id == I915_ENGINE_CLASS_INVALID)
+		return 0xffffffff;
+
+	return (id + 1) | __busy_read_flag(id);
 }
 
 static __always_inline unsigned int
 __busy_set_if_active(const struct dma_fence *fence,
 		     unsigned int (*flag)(unsigned int id))
 {
-	struct i915_request *rq;
+	const struct i915_request *rq;
 
-	/* We have to check the current hw status of the fence as the uABI
+	/*
+	 * We have to check the current hw status of the fence as the uABI
 	 * guarantees forward progress. We could rely on the idle worker
 	 * to eventually flush us, but to minimise latency just ask the
 	 * hardware.
@@ -3980,11 +3981,11 @@  __busy_set_if_active(const struct dma_fence *fence,
 		return 0;
 
 	/* opencode to_request() in order to avoid const warnings */
-	rq = container_of(fence, struct i915_request, fence);
+	rq = container_of(fence, const struct i915_request, fence);
 	if (i915_request_completed(rq))
 		return 0;
 
-	return flag(rq->engine->uabi_id);
+	return flag(rq->engine->uabi_class);
 }
 
 static __always_inline unsigned int
@@ -4018,7 +4019,8 @@  i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 	if (!obj)
 		goto out;
 
-	/* A discrepancy here is that we do not report the status of
+	/*
+	 * A discrepancy here is that we do not report the status of
 	 * non-i915 fences, i.e. even though we may report the object as idle,
 	 * a call to set-domain may still stall waiting for foreign rendering.
 	 * This also means that wait-ioctl may report an object as busy,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 47f3cad6e861..53f68c983334 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -84,7 +84,6 @@  static const struct engine_class_info intel_engine_classes[] = {
 #define MAX_MMIO_BASES 3
 struct engine_info {
 	unsigned int hw_id;
-	unsigned int uabi_id;
 	u8 class;
 	u8 instance;
 	/* mmio bases table *must* be sorted in reverse gen order */
@@ -97,7 +96,6 @@  struct engine_info {
 static const struct engine_info intel_engines[] = {
 	[RCS] = {
 		.hw_id = RCS_HW,
-		.uabi_id = I915_EXEC_RENDER,
 		.class = RENDER_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -106,7 +104,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[BCS] = {
 		.hw_id = BCS_HW,
-		.uabi_id = I915_EXEC_BLT,
 		.class = COPY_ENGINE_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -115,7 +112,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VCS] = {
 		.hw_id = VCS_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -126,7 +122,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VCS2] = {
 		.hw_id = VCS2_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 1,
 		.mmio_bases = {
@@ -136,7 +131,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VCS3] = {
 		.hw_id = VCS3_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 2,
 		.mmio_bases = {
@@ -145,7 +139,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VCS4] = {
 		.hw_id = VCS4_HW,
-		.uabi_id = I915_EXEC_BSD,
 		.class = VIDEO_DECODE_CLASS,
 		.instance = 3,
 		.mmio_bases = {
@@ -154,7 +147,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VECS] = {
 		.hw_id = VECS_HW,
-		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 0,
 		.mmio_bases = {
@@ -164,7 +156,6 @@  static const struct engine_info intel_engines[] = {
 	},
 	[VECS2] = {
 		.hw_id = VECS2_HW,
-		.uabi_id = I915_EXEC_VEBOX,
 		.class = VIDEO_ENHANCEMENT_CLASS,
 		.instance = 1,
 		.mmio_bases = {
@@ -321,7 +312,6 @@  intel_engine_setup(struct drm_i915_private *dev_priv,
 	engine->class = info->class;
 	engine->instance = info->instance;
 
-	engine->uabi_id = info->uabi_id;
 	engine->uabi_class = intel_engine_classes[info->class].uabi_class;
 
 	engine->context_size = __intel_engine_context_size(dev_priv,
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 9ca8f5ff5dd4..f37c5d1468be 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -355,7 +355,6 @@  struct intel_engine_cs {
 	unsigned int hw_id;
 	unsigned int guc_id;
 
-	u8 uabi_id;
 	u8 uabi_class;
 
 	u8 class;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index ca25ca28d75f..9db459b19d4e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1145,32 +1145,34 @@  struct drm_i915_gem_busy {
 	 * as busy may become idle before the ioctl is completed.
 	 *
 	 * Furthermore, if the object is busy, which engine is busy is only
-	 * provided as a guide. There are race conditions which prevent the
-	 * report of which engines are busy from being always accurate.
-	 * However, the converse is not true. If the object is idle, the
-	 * result of the ioctl, that all engines are idle, is accurate.
+	 * provided as a guide and only indirectly by reporting its class
+	 * (there may be more than one engine in each class). There are race
+	 * conditions which prevent the report of which engines are busy from
+	 * being always accurate.  However, the converse is not true. If the
+	 * object is idle, the result of the ioctl, that all engines are idle,
+	 * is accurate.
 	 *
 	 * The returned dword is split into two fields to indicate both
-	 * the engines on which the object is being read, and the
-	 * engine on which it is currently being written (if any).
+	 * the engine classess on which the object is being read, and the
+	 * engine class on which it is currently being written (if any).
 	 *
 	 * The low word (bits 0:15) indicate if the object is being written
 	 * to by any engine (there can only be one, as the GEM implicit
 	 * synchronisation rules force writes to be serialised). Only the
-	 * engine for the last write is reported.
+	 * engine class (offset by 1, I915_ENGINE_CLASS_RENDER is reported as
+	 * 1 not 0 etc) for the last write is reported.
 	 *
-	 * The high word (bits 16:31) are a bitmask of which engines are
-	 * currently reading from the object. Multiple engines may be
+	 * The high word (bits 16:31) are a bitmask of which engines classes
+	 * are currently reading from the object. Multiple engines may be
 	 * reading from the object simultaneously.
 	 *
-	 * The value of each engine is the same as specified in the
-	 * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
-	 * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
-	 * the I915_EXEC_RENDER engine for execution, and so it is never
+	 * The value of each engine class is the same as specified in the
+	 * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e.
+	 * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc.
 	 * reported as active itself. Some hardware may have parallel
 	 * execution engines, e.g. multiple media engines, which are
-	 * mapped to the same identifier in the EXECBUFFER2 ioctl and
-	 * so are not separately reported for busyness.
+	 * mapped to the same class identifier and so are not separately
+	 * reported for busyness.
 	 *
 	 * Caveat emptor:
 	 * Only the boolean result of this query is reliable; that is whether