diff mbox series

[3/4] drm/i915: Drop the CONTEXT_CLONE API

Message ID 20210319223856.2983244-4-jason@jlekstrand.net (mailing list archive)
State New, archived
Headers show
Series drm/i915: uAPI clean-ups part 2 | expand

Commit Message

Jason Ekstrand March 19, 2021, 10:38 p.m. UTC
This API allows one context to grab bits out of another context upon
creation.  It can be used as a short-cut for setparam(getparam()) for
things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
real userspace.  It's used by a few IGT tests and that's it.  Since it
doesn't add any real value (most of the stuff you can CLONE you can copy
in other ways), drop it.

There is one thing that this API allows you to clone which you cannot
clone via getparam/setparam: timelines.  However, timelines are an
implementation detail of i915 and not really something that needs to be
exposed to userspace.  Also, sharing timelines between contexts isn't
obviously useful and supporting it has the potential to complicate i915
internally.  It also doesn't add any functionality that the client can't
get in other ways.  If a client really wants a shared timeline, they can
use a syncobj and set it as an in and out fence on every submit.

Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +-------------------
 include/uapi/drm/i915_drm.h                 |  16 +-
 2 files changed, 6 insertions(+), 209 deletions(-)

Comments

Tvrtko Ursulin March 22, 2021, 11:22 a.m. UTC | #1
On 19/03/2021 22:38, Jason Ekstrand wrote:
> This API allows one context to grab bits out of another context upon
> creation.  It can be used as a short-cut for setparam(getparam()) for
> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> real userspace.  It's used by a few IGT tests and that's it.  Since it
> doesn't add any real value (most of the stuff you can CLONE you can copy
> in other ways), drop it.

No complaints to remove if it ended up unused outside IGT. Latter is a 
_big_ problem though, since it is much more that a few IGT tests. So I 
really think there really needs to be an evaluation and a plan for that 
(we don't want to lose 50% of the coverage over night).

> There is one thing that this API allows you to clone which you cannot
> clone via getparam/setparam: timelines.  However, timelines are an
> implementation detail of i915 and not really something that needs to be

Not really true timelines are i915 implementation detail. They are in 
fact a dma-fence context:seqno concept, nothing more that than. I think 
you are probably confusing struct intel_timeline with the timeline 
wording in the uapi. Former is i915 implementation detail, but 
context:seqno are truly userspace timelines.

But again, no objection to removing unused uapi in principle. Narrative 
has to be accurate and test coverage not lost though.

Regards,

Tvrtko

> exposed to userspace.  Also, sharing timelines between contexts isn't
> obviously useful and supporting it has the potential to complicate i915
> internally.  It also doesn't add any functionality that the client can't
> get in other ways.  If a client really wants a shared timeline, they can
> use a syncobj and set it as an in and out fence on every submit.
> 
> Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +-------------------
>   include/uapi/drm/i915_drm.h                 |  16 +-
>   2 files changed, 6 insertions(+), 209 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index d28ac79de7573..f88bac19333ec 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -1983,207 +1983,14 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
>   	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
>   }
>   
> -static int clone_engines(struct i915_gem_context *dst,
> -			 struct i915_gem_context *src)
> +static int invalid_ext(struct i915_user_extension __user *ext, void *data)
>   {
> -	struct i915_gem_engines *clone, *e;
> -	bool user_engines;
> -	unsigned long n;
> -
> -	e = __context_engines_await(src, &user_engines);
> -	if (!e)
> -		return -ENOENT;
> -
> -	clone = alloc_engines(e->num_engines);
> -	if (!clone)
> -		goto err_unlock;
> -
> -	for (n = 0; n < e->num_engines; n++) {
> -		struct intel_engine_cs *engine;
> -
> -		if (!e->engines[n]) {
> -			clone->engines[n] = NULL;
> -			continue;
> -		}
> -		engine = e->engines[n]->engine;
> -
> -		/*
> -		 * Virtual engines are singletons; they can only exist
> -		 * inside a single context, because they embed their
> -		 * HW context... As each virtual context implies a single
> -		 * timeline (each engine can only dequeue a single request
> -		 * at any time), it would be surprising for two contexts
> -		 * to use the same engine. So let's create a copy of
> -		 * the virtual engine instead.
> -		 */
> -		if (intel_engine_is_virtual(engine))
> -			clone->engines[n] =
> -				intel_execlists_clone_virtual(engine);
> -		else
> -			clone->engines[n] = intel_context_create(engine);
> -		if (IS_ERR_OR_NULL(clone->engines[n])) {
> -			__free_engines(clone, n);
> -			goto err_unlock;
> -		}
> -
> -		intel_context_set_gem(clone->engines[n], dst);
> -	}
> -	clone->num_engines = n;
> -	i915_sw_fence_complete(&e->fence);
> -
> -	/* Serialised by constructor */
> -	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
> -	if (user_engines)
> -		i915_gem_context_set_user_engines(dst);
> -	else
> -		i915_gem_context_clear_user_engines(dst);
> -	return 0;
> -
> -err_unlock:
> -	i915_sw_fence_complete(&e->fence);
> -	return -ENOMEM;
> -}
> -
> -static int clone_flags(struct i915_gem_context *dst,
> -		       struct i915_gem_context *src)
> -{
> -	dst->user_flags = src->user_flags;
> -	return 0;
> -}
> -
> -static int clone_schedattr(struct i915_gem_context *dst,
> -			   struct i915_gem_context *src)
> -{
> -	dst->sched = src->sched;
> -	return 0;
> -}
> -
> -static int clone_sseu(struct i915_gem_context *dst,
> -		      struct i915_gem_context *src)
> -{
> -	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
> -	struct i915_gem_engines *clone;
> -	unsigned long n;
> -	int err;
> -
> -	/* no locking required; sole access under constructor*/
> -	clone = __context_engines_static(dst);
> -	if (e->num_engines != clone->num_engines) {
> -		err = -EINVAL;
> -		goto unlock;
> -	}
> -
> -	for (n = 0; n < e->num_engines; n++) {
> -		struct intel_context *ce = e->engines[n];
> -
> -		if (clone->engines[n]->engine->class != ce->engine->class) {
> -			/* Must have compatible engine maps! */
> -			err = -EINVAL;
> -			goto unlock;
> -		}
> -
> -		/* serialises with set_sseu */
> -		err = intel_context_lock_pinned(ce);
> -		if (err)
> -			goto unlock;
> -
> -		clone->engines[n]->sseu = ce->sseu;
> -		intel_context_unlock_pinned(ce);
> -	}
> -
> -	err = 0;
> -unlock:
> -	i915_gem_context_unlock_engines(src);
> -	return err;
> -}
> -
> -static int clone_timeline(struct i915_gem_context *dst,
> -			  struct i915_gem_context *src)
> -{
> -	if (src->timeline)
> -		__assign_timeline(dst, src->timeline);
> -
> -	return 0;
> -}
> -
> -static int clone_vm(struct i915_gem_context *dst,
> -		    struct i915_gem_context *src)
> -{
> -	struct i915_address_space *vm;
> -	int err = 0;
> -
> -	if (!rcu_access_pointer(src->vm))
> -		return 0;
> -
> -	rcu_read_lock();
> -	vm = context_get_vm_rcu(src);
> -	rcu_read_unlock();
> -
> -	if (!mutex_lock_interruptible(&dst->mutex)) {
> -		__assign_ppgtt(dst, vm);
> -		mutex_unlock(&dst->mutex);
> -	} else {
> -		err = -EINTR;
> -	}
> -
> -	i915_vm_put(vm);
> -	return err;
> -}
> -
> -static int create_clone(struct i915_user_extension __user *ext, void *data)
> -{
> -	static int (* const fn[])(struct i915_gem_context *dst,
> -				  struct i915_gem_context *src) = {
> -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
> -		MAP(ENGINES, clone_engines),
> -		MAP(FLAGS, clone_flags),
> -		MAP(SCHEDATTR, clone_schedattr),
> -		MAP(SSEU, clone_sseu),
> -		MAP(TIMELINE, clone_timeline),
> -		MAP(VM, clone_vm),
> -#undef MAP
> -	};
> -	struct drm_i915_gem_context_create_ext_clone local;
> -	const struct create_ext *arg = data;
> -	struct i915_gem_context *dst = arg->ctx;
> -	struct i915_gem_context *src;
> -	int err, bit;
> -
> -	if (copy_from_user(&local, ext, sizeof(local)))
> -		return -EFAULT;
> -
> -	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
> -		     I915_CONTEXT_CLONE_UNKNOWN);
> -
> -	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
> -		return -EINVAL;
> -
> -	if (local.rsvd)
> -		return -EINVAL;
> -
> -	rcu_read_lock();
> -	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
> -	rcu_read_unlock();
> -	if (!src)
> -		return -ENOENT;
> -
> -	GEM_BUG_ON(src == dst);
> -
> -	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
> -		if (!(local.flags & BIT(bit)))
> -			continue;
> -
> -		err = fn[bit](dst, src);
> -		if (err)
> -			return err;
> -	}
> -
> -	return 0;
> +	return -EINVAL;
>   }
>   
>   static const i915_user_extension_fn create_extensions[] = {
>   	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
> -	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
> +	[I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext,
>   };
>   
>   static bool client_is_banned(struct drm_i915_file_private *file_priv)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 4c4b9254def1b..33ef78cb1deb7 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1841,20 +1841,10 @@ struct drm_i915_gem_context_create_ext_setparam {
>   	struct drm_i915_gem_context_param param;
>   };
>   
> -struct drm_i915_gem_context_create_ext_clone {
> +/* This API has been removed.  On the off chance someone somewhere has
> + * attempted to use it, never re-use this extension number.
> + */
>   #define I915_CONTEXT_CREATE_EXT_CLONE 1
> -	struct i915_user_extension base;
> -	__u32 clone_id;
> -	__u32 flags;
> -#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
> -#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
> -#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
> -#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
> -#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
> -#define I915_CONTEXT_CLONE_VM		(1u << 5)
> -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> -	__u64 rsvd;
> -};
>   
>   struct drm_i915_gem_context_destroy {
>   	__u32 ctx_id;
>
Daniel Vetter March 22, 2021, 2:09 p.m. UTC | #2
On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> 
> On 19/03/2021 22:38, Jason Ekstrand wrote:
> > This API allows one context to grab bits out of another context upon
> > creation.  It can be used as a short-cut for setparam(getparam()) for
> > things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> > real userspace.  It's used by a few IGT tests and that's it.  Since it
> > doesn't add any real value (most of the stuff you can CLONE you can copy
> > in other ways), drop it.
> 
> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> problem though, since it is much more that a few IGT tests. So I really
> think there really needs to be an evaluation and a plan for that (we don't
> want to lose 50% of the coverage over night).
> 
> > There is one thing that this API allows you to clone which you cannot
> > clone via getparam/setparam: timelines.  However, timelines are an
> > implementation detail of i915 and not really something that needs to be
> 
> Not really true timelines are i915 implementation detail. They are in fact a
> dma-fence context:seqno concept, nothing more that than. I think you are
> probably confusing struct intel_timeline with the timeline wording in the
> uapi. Former is i915 implementation detail, but context:seqno are truly
> userspace timelines.

I think you're both saying the same thing and talking a bit past each
another.

Yes the timeline is just a string of dma_fence, that's correct. Now
usually if you submit batches with execbuf, we have 3 ways to synchronize
concurrent submission: implicit sync, sync_file and drm_syncob. They all
map to different needs in different protocols/render apis.

Now in one additional case the kernel makes sure that batchbuffers are
ordered, and that's when you submit them to the same hw ctx. Because
there's only 1 hw context and you really can't have batchbuffers run on
that single hw context out of order. That's what the timeline object we
talk about here is. But that largely is an internal implementation detail,
which happens to also use most/all the same infrastructure as the
dma_fence uapi pieces above.

Now the internal implementation detail leaking here is that we exposed
this to userspace, without there being any need for this. What Jason
implements with syncobj in the next patch is essentially what userspace
should have been using for cross-engine sync. media userspace doesn't care
about interop with winsys/client apis, so they equally could have used
implicit sync or sync_file here (which I think is the solution now for the
new uapi prepped internally), since they all are about equally powerful
for stringing batchbuffers together.

So I do think the assessment is accurate, albeit a bit on the terse side.
Maybe we could quote just the entire thing here in the commit message.
-Danile

> 
> But again, no objection to removing unused uapi in principle. Narrative has
> to be accurate and test coverage not lost though.
> 
> Regards,
> 
> Tvrtko
> 
> > exposed to userspace.  Also, sharing timelines between contexts isn't
> > obviously useful and supporting it has the potential to complicate i915
> > internally.  It also doesn't add any functionality that the client can't
> > get in other ways.  If a client really wants a shared timeline, they can
> > use a syncobj and set it as an in and out fence on every submit.
> > 
> > Signed-off-by: Jason Ekstrand <jason@jlekstrand.net>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   drivers/gpu/drm/i915/gem/i915_gem_context.c | 199 +-------------------
> >   include/uapi/drm/i915_drm.h                 |  16 +-
> >   2 files changed, 6 insertions(+), 209 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > index d28ac79de7573..f88bac19333ec 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> > @@ -1983,207 +1983,14 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
> >   	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
> >   }
> > -static int clone_engines(struct i915_gem_context *dst,
> > -			 struct i915_gem_context *src)
> > +static int invalid_ext(struct i915_user_extension __user *ext, void *data)
> >   {
> > -	struct i915_gem_engines *clone, *e;
> > -	bool user_engines;
> > -	unsigned long n;
> > -
> > -	e = __context_engines_await(src, &user_engines);
> > -	if (!e)
> > -		return -ENOENT;
> > -
> > -	clone = alloc_engines(e->num_engines);
> > -	if (!clone)
> > -		goto err_unlock;
> > -
> > -	for (n = 0; n < e->num_engines; n++) {
> > -		struct intel_engine_cs *engine;
> > -
> > -		if (!e->engines[n]) {
> > -			clone->engines[n] = NULL;
> > -			continue;
> > -		}
> > -		engine = e->engines[n]->engine;
> > -
> > -		/*
> > -		 * Virtual engines are singletons; they can only exist
> > -		 * inside a single context, because they embed their
> > -		 * HW context... As each virtual context implies a single
> > -		 * timeline (each engine can only dequeue a single request
> > -		 * at any time), it would be surprising for two contexts
> > -		 * to use the same engine. So let's create a copy of
> > -		 * the virtual engine instead.
> > -		 */
> > -		if (intel_engine_is_virtual(engine))
> > -			clone->engines[n] =
> > -				intel_execlists_clone_virtual(engine);
> > -		else
> > -			clone->engines[n] = intel_context_create(engine);
> > -		if (IS_ERR_OR_NULL(clone->engines[n])) {
> > -			__free_engines(clone, n);
> > -			goto err_unlock;
> > -		}
> > -
> > -		intel_context_set_gem(clone->engines[n], dst);
> > -	}
> > -	clone->num_engines = n;
> > -	i915_sw_fence_complete(&e->fence);
> > -
> > -	/* Serialised by constructor */
> > -	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
> > -	if (user_engines)
> > -		i915_gem_context_set_user_engines(dst);
> > -	else
> > -		i915_gem_context_clear_user_engines(dst);
> > -	return 0;
> > -
> > -err_unlock:
> > -	i915_sw_fence_complete(&e->fence);
> > -	return -ENOMEM;
> > -}
> > -
> > -static int clone_flags(struct i915_gem_context *dst,
> > -		       struct i915_gem_context *src)
> > -{
> > -	dst->user_flags = src->user_flags;
> > -	return 0;
> > -}
> > -
> > -static int clone_schedattr(struct i915_gem_context *dst,
> > -			   struct i915_gem_context *src)
> > -{
> > -	dst->sched = src->sched;
> > -	return 0;
> > -}
> > -
> > -static int clone_sseu(struct i915_gem_context *dst,
> > -		      struct i915_gem_context *src)
> > -{
> > -	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
> > -	struct i915_gem_engines *clone;
> > -	unsigned long n;
> > -	int err;
> > -
> > -	/* no locking required; sole access under constructor*/
> > -	clone = __context_engines_static(dst);
> > -	if (e->num_engines != clone->num_engines) {
> > -		err = -EINVAL;
> > -		goto unlock;
> > -	}
> > -
> > -	for (n = 0; n < e->num_engines; n++) {
> > -		struct intel_context *ce = e->engines[n];
> > -
> > -		if (clone->engines[n]->engine->class != ce->engine->class) {
> > -			/* Must have compatible engine maps! */
> > -			err = -EINVAL;
> > -			goto unlock;
> > -		}
> > -
> > -		/* serialises with set_sseu */
> > -		err = intel_context_lock_pinned(ce);
> > -		if (err)
> > -			goto unlock;
> > -
> > -		clone->engines[n]->sseu = ce->sseu;
> > -		intel_context_unlock_pinned(ce);
> > -	}
> > -
> > -	err = 0;
> > -unlock:
> > -	i915_gem_context_unlock_engines(src);
> > -	return err;
> > -}
> > -
> > -static int clone_timeline(struct i915_gem_context *dst,
> > -			  struct i915_gem_context *src)
> > -{
> > -	if (src->timeline)
> > -		__assign_timeline(dst, src->timeline);
> > -
> > -	return 0;
> > -}
> > -
> > -static int clone_vm(struct i915_gem_context *dst,
> > -		    struct i915_gem_context *src)
> > -{
> > -	struct i915_address_space *vm;
> > -	int err = 0;
> > -
> > -	if (!rcu_access_pointer(src->vm))
> > -		return 0;
> > -
> > -	rcu_read_lock();
> > -	vm = context_get_vm_rcu(src);
> > -	rcu_read_unlock();
> > -
> > -	if (!mutex_lock_interruptible(&dst->mutex)) {
> > -		__assign_ppgtt(dst, vm);
> > -		mutex_unlock(&dst->mutex);
> > -	} else {
> > -		err = -EINTR;
> > -	}
> > -
> > -	i915_vm_put(vm);
> > -	return err;
> > -}
> > -
> > -static int create_clone(struct i915_user_extension __user *ext, void *data)
> > -{
> > -	static int (* const fn[])(struct i915_gem_context *dst,
> > -				  struct i915_gem_context *src) = {
> > -#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
> > -		MAP(ENGINES, clone_engines),
> > -		MAP(FLAGS, clone_flags),
> > -		MAP(SCHEDATTR, clone_schedattr),
> > -		MAP(SSEU, clone_sseu),
> > -		MAP(TIMELINE, clone_timeline),
> > -		MAP(VM, clone_vm),
> > -#undef MAP
> > -	};
> > -	struct drm_i915_gem_context_create_ext_clone local;
> > -	const struct create_ext *arg = data;
> > -	struct i915_gem_context *dst = arg->ctx;
> > -	struct i915_gem_context *src;
> > -	int err, bit;
> > -
> > -	if (copy_from_user(&local, ext, sizeof(local)))
> > -		return -EFAULT;
> > -
> > -	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
> > -		     I915_CONTEXT_CLONE_UNKNOWN);
> > -
> > -	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
> > -		return -EINVAL;
> > -
> > -	if (local.rsvd)
> > -		return -EINVAL;
> > -
> > -	rcu_read_lock();
> > -	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
> > -	rcu_read_unlock();
> > -	if (!src)
> > -		return -ENOENT;
> > -
> > -	GEM_BUG_ON(src == dst);
> > -
> > -	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
> > -		if (!(local.flags & BIT(bit)))
> > -			continue;
> > -
> > -		err = fn[bit](dst, src);
> > -		if (err)
> > -			return err;
> > -	}
> > -
> > -	return 0;
> > +	return -EINVAL;
> >   }
> >   static const i915_user_extension_fn create_extensions[] = {
> >   	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
> > -	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
> > +	[I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext,
> >   };
> >   static bool client_is_banned(struct drm_i915_file_private *file_priv)
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 4c4b9254def1b..33ef78cb1deb7 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1841,20 +1841,10 @@ struct drm_i915_gem_context_create_ext_setparam {
> >   	struct drm_i915_gem_context_param param;
> >   };
> > -struct drm_i915_gem_context_create_ext_clone {
> > +/* This API has been removed.  On the off chance someone somewhere has
> > + * attempted to use it, never re-use this extension number.
> > + */
> >   #define I915_CONTEXT_CREATE_EXT_CLONE 1
> > -	struct i915_user_extension base;
> > -	__u32 clone_id;
> > -	__u32 flags;
> > -#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
> > -#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
> > -#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
> > -#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
> > -#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
> > -#define I915_CONTEXT_CLONE_VM		(1u << 5)
> > -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
> > -	__u64 rsvd;
> > -};
> >   struct drm_i915_gem_context_destroy {
> >   	__u32 ctx_id;
> > 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Tvrtko Ursulin March 22, 2021, 2:32 p.m. UTC | #3
On 22/03/2021 14:09, Daniel Vetter wrote:
> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
>>
>> On 19/03/2021 22:38, Jason Ekstrand wrote:
>>> This API allows one context to grab bits out of another context upon
>>> creation.  It can be used as a short-cut for setparam(getparam()) for
>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
>>> doesn't add any real value (most of the stuff you can CLONE you can copy
>>> in other ways), drop it.
>>
>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
>> problem though, since it is much more that a few IGT tests. So I really
>> think there really needs to be an evaluation and a plan for that (we don't
>> want to lose 50% of the coverage over night).
>>
>>> There is one thing that this API allows you to clone which you cannot
>>> clone via getparam/setparam: timelines.  However, timelines are an
>>> implementation detail of i915 and not really something that needs to be
>>
>> Not really true timelines are i915 implementation detail. They are in fact a
>> dma-fence context:seqno concept, nothing more that than. I think you are
>> probably confusing struct intel_timeline with the timeline wording in the
>> uapi. Former is i915 implementation detail, but context:seqno are truly
>> userspace timelines.
> 
> I think you're both saying the same thing and talking a bit past each
> another.
> 
> Yes the timeline is just a string of dma_fence, that's correct. Now
> usually if you submit batches with execbuf, we have 3 ways to synchronize
> concurrent submission: implicit sync, sync_file and drm_syncob. They all
> map to different needs in different protocols/render apis.
> 
> Now in one additional case the kernel makes sure that batchbuffers are
> ordered, and that's when you submit them to the same hw ctx. Because
> there's only 1 hw context and you really can't have batchbuffers run on
> that single hw context out of order. That's what the timeline object we
> talk about here is. But that largely is an internal implementation detail,
> which happens to also use most/all the same infrastructure as the
> dma_fence uapi pieces above.
> 
> Now the internal implementation detail leaking here is that we exposed
> this to userspace, without there being any need for this. What Jason
> implements with syncobj in the next patch is essentially what userspace
> should have been using for cross-engine sync. media userspace doesn't care
> about interop with winsys/client apis, so they equally could have used
> implicit sync or sync_file here (which I think is the solution now for the
> new uapi prepped internally), since they all are about equally powerful
> for stringing batchbuffers together.

Are you saying we exposed a single timeline of execution per hw context 
via the single timeline flag?!

Timelines of execution were always exposed. Any "engine" (ring 
previously) in I915_EXEC_RING_MASK was a single timeline of execution. 
It is completely the same with engine map engines, which are also 
different indices into I915_EXEC_RING_MASK space.

Userspace was aware of these timelines forever as well. Media was 
creating multiple contexts to have multiple timelines (so parallelism). 
Everyone knew that engine-hopping submissions needs to be either 
implicitly or explicitly synchronised, etc.

So I really don't see that we have leaked timelines as a concept *now*. 
What the patch has exposed to userspace is a new way to sync between 
timelines and nothing more.

Regards,

Tvrtko

> So I do think the assessment is accurate, albeit a bit on the terse side.
> Maybe we could quote just the entire thing here in the commit message.
Daniel Vetter March 22, 2021, 2:57 p.m. UTC | #4
On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 22/03/2021 14:09, Daniel Vetter wrote:
> > On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 19/03/2021 22:38, Jason Ekstrand wrote:
> >>> This API allows one context to grab bits out of another context upon
> >>> creation.  It can be used as a short-cut for setparam(getparam()) for
> >>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> >>> real userspace.  It's used by a few IGT tests and that's it.  Since it
> >>> doesn't add any real value (most of the stuff you can CLONE you can copy
> >>> in other ways), drop it.
> >>
> >> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> >> problem though, since it is much more that a few IGT tests. So I really
> >> think there really needs to be an evaluation and a plan for that (we don't
> >> want to lose 50% of the coverage over night).
> >>
> >>> There is one thing that this API allows you to clone which you cannot
> >>> clone via getparam/setparam: timelines.  However, timelines are an
> >>> implementation detail of i915 and not really something that needs to be
> >>
> >> Not really true timelines are i915 implementation detail. They are in fact a
> >> dma-fence context:seqno concept, nothing more that than. I think you are
> >> probably confusing struct intel_timeline with the timeline wording in the
> >> uapi. Former is i915 implementation detail, but context:seqno are truly
> >> userspace timelines.
> >
> > I think you're both saying the same thing and talking a bit past each
> > another.
> >
> > Yes the timeline is just a string of dma_fence, that's correct. Now
> > usually if you submit batches with execbuf, we have 3 ways to synchronize
> > concurrent submission: implicit sync, sync_file and drm_syncob. They all
> > map to different needs in different protocols/render apis.
> >
> > Now in one additional case the kernel makes sure that batchbuffers are
> > ordered, and that's when you submit them to the same hw ctx. Because
> > there's only 1 hw context and you really can't have batchbuffers run on
> > that single hw context out of order. That's what the timeline object we
> > talk about here is. But that largely is an internal implementation detail,
> > which happens to also use most/all the same infrastructure as the
> > dma_fence uapi pieces above.
> >
> > Now the internal implementation detail leaking here is that we exposed
> > this to userspace, without there being any need for this. What Jason
> > implements with syncobj in the next patch is essentially what userspace
> > should have been using for cross-engine sync. media userspace doesn't care
> > about interop with winsys/client apis, so they equally could have used
> > implicit sync or sync_file here (which I think is the solution now for the
> > new uapi prepped internally), since they all are about equally powerful
> > for stringing batchbuffers together.
>
> Are you saying we exposed a single timeline of execution per hw context
> via the single timeline flag?!

Nope.

> Timelines of execution were always exposed. Any "engine" (ring
> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
> It is completely the same with engine map engines, which are also
> different indices into I915_EXEC_RING_MASK space.
>
> Userspace was aware of these timelines forever as well. Media was
> creating multiple contexts to have multiple timelines (so parallelism).
> Everyone knew that engine-hopping submissions needs to be either
> implicitly or explicitly synchronised, etc.

Yup, I think we're saying the same thing here.

> So I really don't see that we have leaked timelines as a concept *now*.
> What the patch has exposed to userspace is a new way to sync between
> timelines and nothing more.

We've leaked it as something you can now share across hw context.
Which is possible because of how it's internally implemented (I think
load balancer relies on that), but not really a synchronization
primitive we want to export as such to userspace. We have other
interfaces and concepts for that.
-Daniel
Tvrtko Ursulin March 22, 2021, 3:31 p.m. UTC | #5
On 22/03/2021 14:57, Daniel Vetter wrote:
> On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 22/03/2021 14:09, Daniel Vetter wrote:
>>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
>>>>
>>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
>>>>> This API allows one context to grab bits out of another context upon
>>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
>>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
>>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
>>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
>>>>> in other ways), drop it.
>>>>
>>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
>>>> problem though, since it is much more that a few IGT tests. So I really
>>>> think there really needs to be an evaluation and a plan for that (we don't
>>>> want to lose 50% of the coverage over night).
>>>>
>>>>> There is one thing that this API allows you to clone which you cannot
>>>>> clone via getparam/setparam: timelines.  However, timelines are an
>>>>> implementation detail of i915 and not really something that needs to be
>>>>
>>>> Not really true timelines are i915 implementation detail. They are in fact a
>>>> dma-fence context:seqno concept, nothing more that than. I think you are
>>>> probably confusing struct intel_timeline with the timeline wording in the
>>>> uapi. Former is i915 implementation detail, but context:seqno are truly
>>>> userspace timelines.
>>>
>>> I think you're both saying the same thing and talking a bit past each
>>> another.
>>>
>>> Yes the timeline is just a string of dma_fence, that's correct. Now
>>> usually if you submit batches with execbuf, we have 3 ways to synchronize
>>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
>>> map to different needs in different protocols/render apis.
>>>
>>> Now in one additional case the kernel makes sure that batchbuffers are
>>> ordered, and that's when you submit them to the same hw ctx. Because
>>> there's only 1 hw context and you really can't have batchbuffers run on
>>> that single hw context out of order. That's what the timeline object we
>>> talk about here is. But that largely is an internal implementation detail,
>>> which happens to also use most/all the same infrastructure as the
>>> dma_fence uapi pieces above.
>>>
>>> Now the internal implementation detail leaking here is that we exposed
>>> this to userspace, without there being any need for this. What Jason
>>> implements with syncobj in the next patch is essentially what userspace
>>> should have been using for cross-engine sync. media userspace doesn't care
>>> about interop with winsys/client apis, so they equally could have used
>>> implicit sync or sync_file here (which I think is the solution now for the
>>> new uapi prepped internally), since they all are about equally powerful
>>> for stringing batchbuffers together.
>>
>> Are you saying we exposed a single timeline of execution per hw context
>> via the single timeline flag?!
> 
> Nope.
> 
>> Timelines of execution were always exposed. Any "engine" (ring
>> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
>> It is completely the same with engine map engines, which are also
>> different indices into I915_EXEC_RING_MASK space.
>>
>> Userspace was aware of these timelines forever as well. Media was
>> creating multiple contexts to have multiple timelines (so parallelism).
>> Everyone knew that engine-hopping submissions needs to be either
>> implicitly or explicitly synchronised, etc.
> 
> Yup, I think we're saying the same thing here.
> 
>> So I really don't see that we have leaked timelines as a concept *now*.
>> What the patch has exposed to userspace is a new way to sync between
>> timelines and nothing more.
> 
> We've leaked it as something you can now share across hw context.

Okay so we agree on most things but apparently have different 
definitions of what it means to leak internal implementation details.

While at the same time proof that we haven't leaked the internal 
implementation details is that Jason was able to implement the single 
timeline flag with a drm syncobj at the execbuf top level. (Well mostly, 
ignoring the probably inconsequential difference of one vs multiple 
fence contexts.)

> Which is possible because of how it's internally implemented (I think
> load balancer relies on that), but not really a synchronization

Virtual engine is a single timeline by definition and it is still that 
regardless of the implementation details (execlists or GuC, in both 
cases it is a single hardware context and a single timeline).

> primitive we want to export as such to userspace. We have other
> interfaces and concepts for that.

Yes, that is the only point to argue IMO. We can say it wasn't needed 
and should have been avoided, but I still maintain we can't really say 
we leaked anything backend specific to userspace via it.

Regards,

Tvrtko
Jason Ekstrand March 22, 2021, 4:24 p.m. UTC | #6
Ugh... timezones.

On Mon, Mar 22, 2021 at 10:31 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 22/03/2021 14:57, Daniel Vetter wrote:
> > On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >>
> >> On 22/03/2021 14:09, Daniel Vetter wrote:
> >>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
> >>>>> This API allows one context to grab bits out of another context upon
> >>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
> >>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> >>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
> >>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
> >>>>> in other ways), drop it.
> >>>>
> >>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> >>>> problem though, since it is much more that a few IGT tests. So I really
> >>>> think there really needs to be an evaluation and a plan for that (we don't
> >>>> want to lose 50% of the coverage over night).

You should look at my IGT patch set.  I'm not deleting any tests
except those that explicitly test the clone API.  All the other tests
which use cloning to save a few lines when constructing new contexts
are updated to not require the cloning API.

> >>>>> There is one thing that this API allows you to clone which you cannot
> >>>>> clone via getparam/setparam: timelines.  However, timelines are an
> >>>>> implementation detail of i915 and not really something that needs to be
> >>>>
> >>>> Not really true timelines are i915 implementation detail. They are in fact a
> >>>> dma-fence context:seqno concept, nothing more that than. I think you are
> >>>> probably confusing struct intel_timeline with the timeline wording in the
> >>>> uapi. Former is i915 implementation detail, but context:seqno are truly
> >>>> userspace timelines.
> >>>
> >>> I think you're both saying the same thing and talking a bit past each
> >>> another.
> >>>
> >>> Yes the timeline is just a string of dma_fence, that's correct. Now
> >>> usually if you submit batches with execbuf, we have 3 ways to synchronize
> >>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
> >>> map to different needs in different protocols/render apis.

Right.  We've always had the concept that everything submitted to
given HW context happens in-order.  As Daniel said below, allowing
out-of-order execution on a single HW context would be a bit nuts
because HW contexts are, by definition, stateful.  What this API adds
is a way to do in-order synchronization across multiple HW contexts
which is both new and unnecessary given the other primitives
available.

> >>> Now in one additional case the kernel makes sure that batchbuffers are
> >>> ordered, and that's when you submit them to the same hw ctx. Because
> >>> there's only 1 hw context and you really can't have batchbuffers run on
> >>> that single hw context out of order. That's what the timeline object we
> >>> talk about here is. But that largely is an internal implementation detail,
> >>> which happens to also use most/all the same infrastructure as the
> >>> dma_fence uapi pieces above.
> >>>
> >>> Now the internal implementation detail leaking here is that we exposed
> >>> this to userspace, without there being any need for this. What Jason
> >>> implements with syncobj in the next patch is essentially what userspace
> >>> should have been using for cross-engine sync. media userspace doesn't care
> >>> about interop with winsys/client apis, so they equally could have used
> >>> implicit sync or sync_file here (which I think is the solution now for the
> >>> new uapi prepped internally), since they all are about equally powerful
> >>> for stringing batchbuffers together.
> >>
> >> Are you saying we exposed a single timeline of execution per hw context
> >> via the single timeline flag?!
> >
> > Nope.
> >
> >> Timelines of execution were always exposed. Any "engine" (ring
> >> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
> >> It is completely the same with engine map engines, which are also
> >> different indices into I915_EXEC_RING_MASK space.
> >>
> >> Userspace was aware of these timelines forever as well. Media was
> >> creating multiple contexts to have multiple timelines (so parallelism).
> >> Everyone knew that engine-hopping submissions needs to be either
> >> implicitly or explicitly synchronised, etc.
> >
> > Yup, I think we're saying the same thing here.
> >
> >> So I really don't see that we have leaked timelines as a concept *now*.
> >> What the patch has exposed to userspace is a new way to sync between
> >> timelines and nothing more.
> >
> > We've leaked it as something you can now share across hw context.
>
> Okay so we agree on most things but apparently have different
> definitions of what it means to leak internal implementation details.

I said it was a "leak" because, from my git archeology, the best I
could find for justification of doing it this way was that we already
have a timeline object so why not expose it.  Same for the
SINGLE_TIMELINE flag.  Is a "timeline" really an internal concept?
No, not really.  It's pretty standard.  But intel_timeline is an
internal thing and, while this doesn't give userspace an actual handle
to it, it gives it more visibility than needed, IMO.

--Jason


> While at the same time proof that we haven't leaked the internal
> implementation details is that Jason was able to implement the single
> timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
> ignoring the probably inconsequential difference of one vs multiple
> fence contexts.)
>
> > Which is possible because of how it's internally implemented (I think
> > load balancer relies on that), but not really a synchronization
>
> Virtual engine is a single timeline by definition and it is still that
> regardless of the implementation details (execlists or GuC, in both
> cases it is a single hardware context and a single timeline).
>
> > primitive we want to export as such to userspace. We have other
> > interfaces and concepts for that.
>
> Yes, that is the only point to argue IMO. We can say it wasn't needed
> and should have been avoided, but I still maintain we can't really say
> we leaked anything backend specific to userspace via it.
>
> Regards,
>
> Tvrtko
Daniel Vetter March 22, 2021, 4:43 p.m. UTC | #7
On Mon, Mar 22, 2021 at 4:31 PM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 22/03/2021 14:57, Daniel Vetter wrote:
> > On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >>
> >> On 22/03/2021 14:09, Daniel Vetter wrote:
> >>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> >>>>
> >>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
> >>>>> This API allows one context to grab bits out of another context upon
> >>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
> >>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> >>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
> >>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
> >>>>> in other ways), drop it.
> >>>>
> >>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> >>>> problem though, since it is much more that a few IGT tests. So I really
> >>>> think there really needs to be an evaluation and a plan for that (we don't
> >>>> want to lose 50% of the coverage over night).
> >>>>
> >>>>> There is one thing that this API allows you to clone which you cannot
> >>>>> clone via getparam/setparam: timelines.  However, timelines are an
> >>>>> implementation detail of i915 and not really something that needs to be
> >>>>
> >>>> Not really true timelines are i915 implementation detail. They are in fact a
> >>>> dma-fence context:seqno concept, nothing more that than. I think you are
> >>>> probably confusing struct intel_timeline with the timeline wording in the
> >>>> uapi. Former is i915 implementation detail, but context:seqno are truly
> >>>> userspace timelines.
> >>>
> >>> I think you're both saying the same thing and talking a bit past each
> >>> another.
> >>>
> >>> Yes the timeline is just a string of dma_fence, that's correct. Now
> >>> usually if you submit batches with execbuf, we have 3 ways to synchronize
> >>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
> >>> map to different needs in different protocols/render apis.
> >>>
> >>> Now in one additional case the kernel makes sure that batchbuffers are
> >>> ordered, and that's when you submit them to the same hw ctx. Because
> >>> there's only 1 hw context and you really can't have batchbuffers run on
> >>> that single hw context out of order. That's what the timeline object we
> >>> talk about here is. But that largely is an internal implementation detail,
> >>> which happens to also use most/all the same infrastructure as the
> >>> dma_fence uapi pieces above.
> >>>
> >>> Now the internal implementation detail leaking here is that we exposed
> >>> this to userspace, without there being any need for this. What Jason
> >>> implements with syncobj in the next patch is essentially what userspace
> >>> should have been using for cross-engine sync. media userspace doesn't care
> >>> about interop with winsys/client apis, so they equally could have used
> >>> implicit sync or sync_file here (which I think is the solution now for the
> >>> new uapi prepped internally), since they all are about equally powerful
> >>> for stringing batchbuffers together.
> >>
> >> Are you saying we exposed a single timeline of execution per hw context
> >> via the single timeline flag?!
> >
> > Nope.
> >
> >> Timelines of execution were always exposed. Any "engine" (ring
> >> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
> >> It is completely the same with engine map engines, which are also
> >> different indices into I915_EXEC_RING_MASK space.
> >>
> >> Userspace was aware of these timelines forever as well. Media was
> >> creating multiple contexts to have multiple timelines (so parallelism).
> >> Everyone knew that engine-hopping submissions needs to be either
> >> implicitly or explicitly synchronised, etc.
> >
> > Yup, I think we're saying the same thing here.
> >
> >> So I really don't see that we have leaked timelines as a concept *now*.
> >> What the patch has exposed to userspace is a new way to sync between
> >> timelines and nothing more.
> >
> > We've leaked it as something you can now share across hw context.
>
> Okay so we agree on most things but apparently have different
> definitions of what it means to leak internal implementation details.
>
> While at the same time proof that we haven't leaked the internal
> implementation details is that Jason was able to implement the single
> timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
> ignoring the probably inconsequential difference of one vs multiple
> fence contexts.)

It's not a matching implementation. It's only good enough for what
media needs, and essentially what media should have done to begin
with.

There's substantially different behaviour between SINGLE_TIMELINE and
what Jason has done here when you race concurrent execbuf calls:
Former guarantees total ordering, the latter doesn't even try. They
are not the same thing, but luckily userspace doesn't care about that
difference.

Aside, just to make sure this wont get lost: I do agree that we should
only allow this up to maybe ADL, and reject it on anything new (maybe
including dg1 while we're at it, since the pci ids for that aren't
even close to upstream yet).
-Daniel

> > Which is possible because of how it's internally implemented (I think
> > load balancer relies on that), but not really a synchronization
>
> Virtual engine is a single timeline by definition and it is still that
> regardless of the implementation details (execlists or GuC, in both
> cases it is a single hardware context and a single timeline).
>
> > primitive we want to export as such to userspace. We have other
> > interfaces and concepts for that.
>
> Yes, that is the only point to argue IMO. We can say it wasn't needed
> and should have been avoided, but I still maintain we can't really say
> we leaked anything backend specific to userspace via it.
>
> Regards,
>
> Tvrtko
Tvrtko Ursulin March 23, 2021, 9:14 a.m. UTC | #8
On 22/03/2021 16:43, Daniel Vetter wrote:
> On Mon, Mar 22, 2021 at 4:31 PM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 22/03/2021 14:57, Daniel Vetter wrote:
>>> On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>
>>>>
>>>> On 22/03/2021 14:09, Daniel Vetter wrote:
>>>>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
>>>>>>> This API allows one context to grab bits out of another context upon
>>>>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
>>>>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
>>>>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
>>>>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
>>>>>>> in other ways), drop it.
>>>>>>
>>>>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
>>>>>> problem though, since it is much more that a few IGT tests. So I really
>>>>>> think there really needs to be an evaluation and a plan for that (we don't
>>>>>> want to lose 50% of the coverage over night).
>>>>>>
>>>>>>> There is one thing that this API allows you to clone which you cannot
>>>>>>> clone via getparam/setparam: timelines.  However, timelines are an
>>>>>>> implementation detail of i915 and not really something that needs to be
>>>>>>
>>>>>> Not really true timelines are i915 implementation detail. They are in fact a
>>>>>> dma-fence context:seqno concept, nothing more that than. I think you are
>>>>>> probably confusing struct intel_timeline with the timeline wording in the
>>>>>> uapi. Former is i915 implementation detail, but context:seqno are truly
>>>>>> userspace timelines.
>>>>>
>>>>> I think you're both saying the same thing and talking a bit past each
>>>>> another.
>>>>>
>>>>> Yes the timeline is just a string of dma_fence, that's correct. Now
>>>>> usually if you submit batches with execbuf, we have 3 ways to synchronize
>>>>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
>>>>> map to different needs in different protocols/render apis.
>>>>>
>>>>> Now in one additional case the kernel makes sure that batchbuffers are
>>>>> ordered, and that's when you submit them to the same hw ctx. Because
>>>>> there's only 1 hw context and you really can't have batchbuffers run on
>>>>> that single hw context out of order. That's what the timeline object we
>>>>> talk about here is. But that largely is an internal implementation detail,
>>>>> which happens to also use most/all the same infrastructure as the
>>>>> dma_fence uapi pieces above.
>>>>>
>>>>> Now the internal implementation detail leaking here is that we exposed
>>>>> this to userspace, without there being any need for this. What Jason
>>>>> implements with syncobj in the next patch is essentially what userspace
>>>>> should have been using for cross-engine sync. media userspace doesn't care
>>>>> about interop with winsys/client apis, so they equally could have used
>>>>> implicit sync or sync_file here (which I think is the solution now for the
>>>>> new uapi prepped internally), since they all are about equally powerful
>>>>> for stringing batchbuffers together.
>>>>
>>>> Are you saying we exposed a single timeline of execution per hw context
>>>> via the single timeline flag?!
>>>
>>> Nope.
>>>
>>>> Timelines of execution were always exposed. Any "engine" (ring
>>>> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
>>>> It is completely the same with engine map engines, which are also
>>>> different indices into I915_EXEC_RING_MASK space.
>>>>
>>>> Userspace was aware of these timelines forever as well. Media was
>>>> creating multiple contexts to have multiple timelines (so parallelism).
>>>> Everyone knew that engine-hopping submissions needs to be either
>>>> implicitly or explicitly synchronised, etc.
>>>
>>> Yup, I think we're saying the same thing here.
>>>
>>>> So I really don't see that we have leaked timelines as a concept *now*.
>>>> What the patch has exposed to userspace is a new way to sync between
>>>> timelines and nothing more.
>>>
>>> We've leaked it as something you can now share across hw context.
>>
>> Okay so we agree on most things but apparently have different
>> definitions of what it means to leak internal implementation details.
>>
>> While at the same time proof that we haven't leaked the internal
>> implementation details is that Jason was able to implement the single
>> timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
>> ignoring the probably inconsequential difference of one vs multiple
>> fence contexts.)
> 
> It's not a matching implementation. It's only good enough for what
> media needs, and essentially what media should have done to begin
> with.
> 
> There's substantially different behaviour between SINGLE_TIMELINE and
> what Jason has done here when you race concurrent execbuf calls:
> Former guarantees total ordering, the latter doesn't even try. They
> are not the same thing, but luckily userspace doesn't care about that
> difference.

Sounds like a very important difference to stress in the commit message.

Secondly, I am unclear whether we have agreement on whether the single 
timeline flag is leaking implementation details of the execlists 
scheduler to userspace or not?

Regards,

Tvrtko

> 
> Aside, just to make sure this wont get lost: I do agree that we should
> only allow this up to maybe ADL, and reject it on anything new (maybe
> including dg1 while we're at it, since the pci ids for that aren't
> even close to upstream yet).
> -Daniel
> 
>>> Which is possible because of how it's internally implemented (I think
>>> load balancer relies on that), but not really a synchronization
>>
>> Virtual engine is a single timeline by definition and it is still that
>> regardless of the implementation details (execlists or GuC, in both
>> cases it is a single hardware context and a single timeline).
>>
>>> primitive we want to export as such to userspace. We have other
>>> interfaces and concepts for that.
>>
>> Yes, that is the only point to argue IMO. We can say it wasn't needed
>> and should have been avoided, but I still maintain we can't really say
>> we leaked anything backend specific to userspace via it.
>>
>> Regards,
>>
>> Tvrtko
> 
> 
>
Tvrtko Ursulin March 23, 2021, 9:46 a.m. UTC | #9
On 22/03/2021 16:24, Jason Ekstrand wrote:
> Ugh... timezones.
> 
> On Mon, Mar 22, 2021 at 10:31 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 22/03/2021 14:57, Daniel Vetter wrote:
>>> On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>
>>>>
>>>> On 22/03/2021 14:09, Daniel Vetter wrote:
>>>>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
>>>>>>
>>>>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
>>>>>>> This API allows one context to grab bits out of another context upon
>>>>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
>>>>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
>>>>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
>>>>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
>>>>>>> in other ways), drop it.
>>>>>>
>>>>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
>>>>>> problem though, since it is much more that a few IGT tests. So I really
>>>>>> think there really needs to be an evaluation and a plan for that (we don't
>>>>>> want to lose 50% of the coverage over night).
> 
> You should look at my IGT patch set.  I'm not deleting any tests
> except those that explicitly test the clone API.  All the other tests
> which use cloning to save a few lines when constructing new contexts
> are updated to not require the cloning API.

I dare not mention the other IGT tree. There will be a plan needed since 
I fear much more usage will be found there.

[snip]

>>>> Timelines of execution were always exposed. Any "engine" (ring
>>>> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
>>>> It is completely the same with engine map engines, which are also
>>>> different indices into I915_EXEC_RING_MASK space.
>>>>
>>>> Userspace was aware of these timelines forever as well. Media was
>>>> creating multiple contexts to have multiple timelines (so parallelism).
>>>> Everyone knew that engine-hopping submissions needs to be either
>>>> implicitly or explicitly synchronised, etc.
>>>
>>> Yup, I think we're saying the same thing here.
>>>
>>>> So I really don't see that we have leaked timelines as a concept *now*.
>>>> What the patch has exposed to userspace is a new way to sync between
>>>> timelines and nothing more.
>>>
>>> We've leaked it as something you can now share across hw context.
>>
>> Okay so we agree on most things but apparently have different
>> definitions of what it means to leak internal implementation details.
> 
> I said it was a "leak" because, from my git archeology, the best I
> could find for justification of doing it this way was that we already
> have a timeline object so why not expose it.  Same for the
> SINGLE_TIMELINE flag.  Is a "timeline" really an internal concept?
> No, not really.  It's pretty standard.  But intel_timeline is an
> internal thing and, while this doesn't give userspace an actual handle
> to it, it gives it more visibility than needed, IMO.

Cloning of timelines absolutely - I don't see a point for that. But I 
think there was no intent there. Rather it was just a consequence of 
striving for symmetry in the uapi.

But for the single timeline flag itself (so next patch in this series 
and it's commit message), when looked at within a single GEM context, I 
still really can't see the argument that it is leaking anything to 
userspace. Certainly not intel_timeline, which is also not even backend 
specific.

We seem to all agree timeline is just context:seqno, which was exposed 
to userpsace forever. For instance if the flag wasn't called "single 
timeline" but "implicit sync", "serial context", "ordered engines", 
whatever, would you still argue it is leaking struct intel_timeline out 
to userspace?

Regards,

Tvrtko
Daniel Vetter March 23, 2021, 1:23 p.m. UTC | #10
On Tue, Mar 23, 2021 at 09:14:36AM +0000, Tvrtko Ursulin wrote:
> 
> On 22/03/2021 16:43, Daniel Vetter wrote:
> > On Mon, Mar 22, 2021 at 4:31 PM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> > > 
> > > 
> > > On 22/03/2021 14:57, Daniel Vetter wrote:
> > > > On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
> > > > <tvrtko.ursulin@linux.intel.com> wrote:
> > > > > 
> > > > > 
> > > > > On 22/03/2021 14:09, Daniel Vetter wrote:
> > > > > > On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> > > > > > > 
> > > > > > > On 19/03/2021 22:38, Jason Ekstrand wrote:
> > > > > > > > This API allows one context to grab bits out of another context upon
> > > > > > > > creation.  It can be used as a short-cut for setparam(getparam()) for
> > > > > > > > things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> > > > > > > > real userspace.  It's used by a few IGT tests and that's it.  Since it
> > > > > > > > doesn't add any real value (most of the stuff you can CLONE you can copy
> > > > > > > > in other ways), drop it.
> > > > > > > 
> > > > > > > No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> > > > > > > problem though, since it is much more that a few IGT tests. So I really
> > > > > > > think there really needs to be an evaluation and a plan for that (we don't
> > > > > > > want to lose 50% of the coverage over night).
> > > > > > > 
> > > > > > > > There is one thing that this API allows you to clone which you cannot
> > > > > > > > clone via getparam/setparam: timelines.  However, timelines are an
> > > > > > > > implementation detail of i915 and not really something that needs to be
> > > > > > > 
> > > > > > > Not really true timelines are i915 implementation detail. They are in fact a
> > > > > > > dma-fence context:seqno concept, nothing more that than. I think you are
> > > > > > > probably confusing struct intel_timeline with the timeline wording in the
> > > > > > > uapi. Former is i915 implementation detail, but context:seqno are truly
> > > > > > > userspace timelines.
> > > > > > 
> > > > > > I think you're both saying the same thing and talking a bit past each
> > > > > > another.
> > > > > > 
> > > > > > Yes the timeline is just a string of dma_fence, that's correct. Now
> > > > > > usually if you submit batches with execbuf, we have 3 ways to synchronize
> > > > > > concurrent submission: implicit sync, sync_file and drm_syncob. They all
> > > > > > map to different needs in different protocols/render apis.
> > > > > > 
> > > > > > Now in one additional case the kernel makes sure that batchbuffers are
> > > > > > ordered, and that's when you submit them to the same hw ctx. Because
> > > > > > there's only 1 hw context and you really can't have batchbuffers run on
> > > > > > that single hw context out of order. That's what the timeline object we
> > > > > > talk about here is. But that largely is an internal implementation detail,
> > > > > > which happens to also use most/all the same infrastructure as the
> > > > > > dma_fence uapi pieces above.
> > > > > > 
> > > > > > Now the internal implementation detail leaking here is that we exposed
> > > > > > this to userspace, without there being any need for this. What Jason
> > > > > > implements with syncobj in the next patch is essentially what userspace
> > > > > > should have been using for cross-engine sync. media userspace doesn't care
> > > > > > about interop with winsys/client apis, so they equally could have used
> > > > > > implicit sync or sync_file here (which I think is the solution now for the
> > > > > > new uapi prepped internally), since they all are about equally powerful
> > > > > > for stringing batchbuffers together.
> > > > > 
> > > > > Are you saying we exposed a single timeline of execution per hw context
> > > > > via the single timeline flag?!
> > > > 
> > > > Nope.
> > > > 
> > > > > Timelines of execution were always exposed. Any "engine" (ring
> > > > > previously) in I915_EXEC_RING_MASK was a single timeline of execution.
> > > > > It is completely the same with engine map engines, which are also
> > > > > different indices into I915_EXEC_RING_MASK space.
> > > > > 
> > > > > Userspace was aware of these timelines forever as well. Media was
> > > > > creating multiple contexts to have multiple timelines (so parallelism).
> > > > > Everyone knew that engine-hopping submissions needs to be either
> > > > > implicitly or explicitly synchronised, etc.
> > > > 
> > > > Yup, I think we're saying the same thing here.
> > > > 
> > > > > So I really don't see that we have leaked timelines as a concept *now*.
> > > > > What the patch has exposed to userspace is a new way to sync between
> > > > > timelines and nothing more.
> > > > 
> > > > We've leaked it as something you can now share across hw context.
> > > 
> > > Okay so we agree on most things but apparently have different
> > > definitions of what it means to leak internal implementation details.
> > > 
> > > While at the same time proof that we haven't leaked the internal
> > > implementation details is that Jason was able to implement the single
> > > timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
> > > ignoring the probably inconsequential difference of one vs multiple
> > > fence contexts.)
> > 
> > It's not a matching implementation. It's only good enough for what
> > media needs, and essentially what media should have done to begin
> > with.
> > 
> > There's substantially different behaviour between SINGLE_TIMELINE and
> > what Jason has done here when you race concurrent execbuf calls:
> > Former guarantees total ordering, the latter doesn't even try. They
> > are not the same thing, but luckily userspace doesn't care about that
> > difference.
> 
> Sounds like a very important difference to stress in the commit message.
> 
> Secondly, I am unclear whether we have agreement on whether the single
> timeline flag is leaking implementation details of the execlists scheduler
> to userspace or not?

I do think Jason&me agree on that it does leak an internal concept to
userspace that we shouldn't leak.

I'm honestly not entirely understanding your argument for why
single_timeline isn't an internal concept somehow, and how exposing it to
userspace doesn't leak that concept to userspace. Whether internally that
concept is now perfectly represented by just struct intel_timeline, or
maybe more the seqno/hswp, or more diffused through the code doesn't
really change that we have an internal concept that we're now exposing for
sharing in ways that wasn't possible before.
-Daniel

> Regards,
> 
> Tvrtko
> 
> > 
> > Aside, just to make sure this wont get lost: I do agree that we should
> > only allow this up to maybe ADL, and reject it on anything new (maybe
> > including dg1 while we're at it, since the pci ids for that aren't
> > even close to upstream yet).
> > -Daniel
> > 
> > > > Which is possible because of how it's internally implemented (I think
> > > > load balancer relies on that), but not really a synchronization
> > > 
> > > Virtual engine is a single timeline by definition and it is still that
> > > regardless of the implementation details (execlists or GuC, in both
> > > cases it is a single hardware context and a single timeline).
> > > 
> > > > primitive we want to export as such to userspace. We have other
> > > > interfaces and concepts for that.
> > > 
> > > Yes, that is the only point to argue IMO. We can say it wasn't needed
> > > and should have been avoided, but I still maintain we can't really say
> > > we leaked anything backend specific to userspace via it.
> > > 
> > > Regards,
> > > 
> > > Tvrtko
> > 
> > 
> >
Tvrtko Ursulin March 23, 2021, 4:23 p.m. UTC | #11
On 23/03/2021 13:23, Daniel Vetter wrote:
> On Tue, Mar 23, 2021 at 09:14:36AM +0000, Tvrtko Ursulin wrote:
>>
>> On 22/03/2021 16:43, Daniel Vetter wrote:
>>> On Mon, Mar 22, 2021 at 4:31 PM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>
>>>>
>>>> On 22/03/2021 14:57, Daniel Vetter wrote:
>>>>> On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
>>>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>>>
>>>>>>
>>>>>> On 22/03/2021 14:09, Daniel Vetter wrote:
>>>>>>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
>>>>>>>>
>>>>>>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
>>>>>>>>> This API allows one context to grab bits out of another context upon
>>>>>>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
>>>>>>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
>>>>>>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
>>>>>>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
>>>>>>>>> in other ways), drop it.
>>>>>>>>
>>>>>>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
>>>>>>>> problem though, since it is much more that a few IGT tests. So I really
>>>>>>>> think there really needs to be an evaluation and a plan for that (we don't
>>>>>>>> want to lose 50% of the coverage over night).
>>>>>>>>
>>>>>>>>> There is one thing that this API allows you to clone which you cannot
>>>>>>>>> clone via getparam/setparam: timelines.  However, timelines are an
>>>>>>>>> implementation detail of i915 and not really something that needs to be
>>>>>>>>
>>>>>>>> Not really true timelines are i915 implementation detail. They are in fact a
>>>>>>>> dma-fence context:seqno concept, nothing more that than. I think you are
>>>>>>>> probably confusing struct intel_timeline with the timeline wording in the
>>>>>>>> uapi. Former is i915 implementation detail, but context:seqno are truly
>>>>>>>> userspace timelines.
>>>>>>>
>>>>>>> I think you're both saying the same thing and talking a bit past each
>>>>>>> another.
>>>>>>>
>>>>>>> Yes the timeline is just a string of dma_fence, that's correct. Now
>>>>>>> usually if you submit batches with execbuf, we have 3 ways to synchronize
>>>>>>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
>>>>>>> map to different needs in different protocols/render apis.
>>>>>>>
>>>>>>> Now in one additional case the kernel makes sure that batchbuffers are
>>>>>>> ordered, and that's when you submit them to the same hw ctx. Because
>>>>>>> there's only 1 hw context and you really can't have batchbuffers run on
>>>>>>> that single hw context out of order. That's what the timeline object we
>>>>>>> talk about here is. But that largely is an internal implementation detail,
>>>>>>> which happens to also use most/all the same infrastructure as the
>>>>>>> dma_fence uapi pieces above.
>>>>>>>
>>>>>>> Now the internal implementation detail leaking here is that we exposed
>>>>>>> this to userspace, without there being any need for this. What Jason
>>>>>>> implements with syncobj in the next patch is essentially what userspace
>>>>>>> should have been using for cross-engine sync. media userspace doesn't care
>>>>>>> about interop with winsys/client apis, so they equally could have used
>>>>>>> implicit sync or sync_file here (which I think is the solution now for the
>>>>>>> new uapi prepped internally), since they all are about equally powerful
>>>>>>> for stringing batchbuffers together.
>>>>>>
>>>>>> Are you saying we exposed a single timeline of execution per hw context
>>>>>> via the single timeline flag?!
>>>>>
>>>>> Nope.
>>>>>
>>>>>> Timelines of execution were always exposed. Any "engine" (ring
>>>>>> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
>>>>>> It is completely the same with engine map engines, which are also
>>>>>> different indices into I915_EXEC_RING_MASK space.
>>>>>>
>>>>>> Userspace was aware of these timelines forever as well. Media was
>>>>>> creating multiple contexts to have multiple timelines (so parallelism).
>>>>>> Everyone knew that engine-hopping submissions needs to be either
>>>>>> implicitly or explicitly synchronised, etc.
>>>>>
>>>>> Yup, I think we're saying the same thing here.
>>>>>
>>>>>> So I really don't see that we have leaked timelines as a concept *now*.
>>>>>> What the patch has exposed to userspace is a new way to sync between
>>>>>> timelines and nothing more.
>>>>>
>>>>> We've leaked it as something you can now share across hw context.
>>>>
>>>> Okay so we agree on most things but apparently have different
>>>> definitions of what it means to leak internal implementation details.
>>>>
>>>> While at the same time proof that we haven't leaked the internal
>>>> implementation details is that Jason was able to implement the single
>>>> timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
>>>> ignoring the probably inconsequential difference of one vs multiple
>>>> fence contexts.)
>>>
>>> It's not a matching implementation. It's only good enough for what
>>> media needs, and essentially what media should have done to begin
>>> with.
>>>
>>> There's substantially different behaviour between SINGLE_TIMELINE and
>>> what Jason has done here when you race concurrent execbuf calls:
>>> Former guarantees total ordering, the latter doesn't even try. They
>>> are not the same thing, but luckily userspace doesn't care about that
>>> difference.
>>
>> Sounds like a very important difference to stress in the commit message.
>>
>> Secondly, I am unclear whether we have agreement on whether the single
>> timeline flag is leaking implementation details of the execlists scheduler
>> to userspace or not?
> 
> I do think Jason&me agree on that it does leak an internal concept to
> userspace that we shouldn't leak.
> 
> I'm honestly not entirely understanding your argument for why
> single_timeline isn't an internal concept somehow, and how exposing it to
> userspace doesn't leak that concept to userspace. Whether internally that
> concept is now perfectly represented by just struct intel_timeline, or
> maybe more the seqno/hswp, or more diffused through the code doesn't
> really change that we have an internal concept that we're now exposing for
> sharing in ways that wasn't possible before.

Don't know, obviously we think with very different paradigms.

GEM context always had as many timelines as there are engines in it's 
map so multiple timelines is the default mode everyone is aware of.

Single timeline flag added a new mode where instead of multiple 
timelines single GEM context becomes a single timeline.

The fact that userspace can achieve the single timeline execution on its 
own should be an argument enough that it is not a new concept that got 
leaked out. Definitely not any backend specific implementation details. 
It simply added a new feature which may or may not have been needed.

Regards,

Tvrtko

P.S.
Or rename the flag in your mind to "I915_GEM_CONTEXT_SERIAL_EXECUTION" 
or something and see if that still leaks the timeline or some 
implementation details.

P.P.S Keep in mind I am arguing on wording in single timeline flag 
removal. Removal of timeline cloning is not controversial.
Jason Ekstrand March 23, 2021, 5:50 p.m. UTC | #12
On Tue, Mar 23, 2021 at 11:23 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 23/03/2021 13:23, Daniel Vetter wrote:
> > On Tue, Mar 23, 2021 at 09:14:36AM +0000, Tvrtko Ursulin wrote:
> >>
> >> On 22/03/2021 16:43, Daniel Vetter wrote:
> >>> On Mon, Mar 22, 2021 at 4:31 PM Tvrtko Ursulin
> >>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>>
> >>>>
> >>>> On 22/03/2021 14:57, Daniel Vetter wrote:
> >>>>> On Mon, Mar 22, 2021 at 3:33 PM Tvrtko Ursulin
> >>>>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>>>>
> >>>>>>
> >>>>>> On 22/03/2021 14:09, Daniel Vetter wrote:
> >>>>>>> On Mon, Mar 22, 2021 at 11:22:01AM +0000, Tvrtko Ursulin wrote:
> >>>>>>>>
> >>>>>>>> On 19/03/2021 22:38, Jason Ekstrand wrote:
> >>>>>>>>> This API allows one context to grab bits out of another context upon
> >>>>>>>>> creation.  It can be used as a short-cut for setparam(getparam()) for
> >>>>>>>>> things like I915_CONTEXT_PARAM_VM.  However, it's never been used by any
> >>>>>>>>> real userspace.  It's used by a few IGT tests and that's it.  Since it
> >>>>>>>>> doesn't add any real value (most of the stuff you can CLONE you can copy
> >>>>>>>>> in other ways), drop it.
> >>>>>>>>
> >>>>>>>> No complaints to remove if it ended up unused outside IGT. Latter is a _big_
> >>>>>>>> problem though, since it is much more that a few IGT tests. So I really
> >>>>>>>> think there really needs to be an evaluation and a plan for that (we don't
> >>>>>>>> want to lose 50% of the coverage over night).
> >>>>>>>>
> >>>>>>>>> There is one thing that this API allows you to clone which you cannot
> >>>>>>>>> clone via getparam/setparam: timelines.  However, timelines are an
> >>>>>>>>> implementation detail of i915 and not really something that needs to be
> >>>>>>>>
> >>>>>>>> Not really true timelines are i915 implementation detail. They are in fact a
> >>>>>>>> dma-fence context:seqno concept, nothing more that than. I think you are
> >>>>>>>> probably confusing struct intel_timeline with the timeline wording in the
> >>>>>>>> uapi. Former is i915 implementation detail, but context:seqno are truly
> >>>>>>>> userspace timelines.
> >>>>>>>
> >>>>>>> I think you're both saying the same thing and talking a bit past each
> >>>>>>> another.
> >>>>>>>
> >>>>>>> Yes the timeline is just a string of dma_fence, that's correct. Now
> >>>>>>> usually if you submit batches with execbuf, we have 3 ways to synchronize
> >>>>>>> concurrent submission: implicit sync, sync_file and drm_syncob. They all
> >>>>>>> map to different needs in different protocols/render apis.
> >>>>>>>
> >>>>>>> Now in one additional case the kernel makes sure that batchbuffers are
> >>>>>>> ordered, and that's when you submit them to the same hw ctx. Because
> >>>>>>> there's only 1 hw context and you really can't have batchbuffers run on
> >>>>>>> that single hw context out of order. That's what the timeline object we
> >>>>>>> talk about here is. But that largely is an internal implementation detail,
> >>>>>>> which happens to also use most/all the same infrastructure as the
> >>>>>>> dma_fence uapi pieces above.
> >>>>>>>
> >>>>>>> Now the internal implementation detail leaking here is that we exposed
> >>>>>>> this to userspace, without there being any need for this. What Jason
> >>>>>>> implements with syncobj in the next patch is essentially what userspace
> >>>>>>> should have been using for cross-engine sync. media userspace doesn't care
> >>>>>>> about interop with winsys/client apis, so they equally could have used
> >>>>>>> implicit sync or sync_file here (which I think is the solution now for the
> >>>>>>> new uapi prepped internally), since they all are about equally powerful
> >>>>>>> for stringing batchbuffers together.
> >>>>>>
> >>>>>> Are you saying we exposed a single timeline of execution per hw context
> >>>>>> via the single timeline flag?!
> >>>>>
> >>>>> Nope.
> >>>>>
> >>>>>> Timelines of execution were always exposed. Any "engine" (ring
> >>>>>> previously) in I915_EXEC_RING_MASK was a single timeline of execution.
> >>>>>> It is completely the same with engine map engines, which are also
> >>>>>> different indices into I915_EXEC_RING_MASK space.
> >>>>>>
> >>>>>> Userspace was aware of these timelines forever as well. Media was
> >>>>>> creating multiple contexts to have multiple timelines (so parallelism).
> >>>>>> Everyone knew that engine-hopping submissions needs to be either
> >>>>>> implicitly or explicitly synchronised, etc.
> >>>>>
> >>>>> Yup, I think we're saying the same thing here.
> >>>>>
> >>>>>> So I really don't see that we have leaked timelines as a concept *now*.
> >>>>>> What the patch has exposed to userspace is a new way to sync between
> >>>>>> timelines and nothing more.
> >>>>>
> >>>>> We've leaked it as something you can now share across hw context.
> >>>>
> >>>> Okay so we agree on most things but apparently have different
> >>>> definitions of what it means to leak internal implementation details.
> >>>>
> >>>> While at the same time proof that we haven't leaked the internal
> >>>> implementation details is that Jason was able to implement the single
> >>>> timeline flag with a drm syncobj at the execbuf top level. (Well mostly,
> >>>> ignoring the probably inconsequential difference of one vs multiple
> >>>> fence contexts.)
> >>>
> >>> It's not a matching implementation. It's only good enough for what
> >>> media needs, and essentially what media should have done to begin
> >>> with.
> >>>
> >>> There's substantially different behaviour between SINGLE_TIMELINE and
> >>> what Jason has done here when you race concurrent execbuf calls:
> >>> Former guarantees total ordering, the latter doesn't even try. They
> >>> are not the same thing, but luckily userspace doesn't care about that
> >>> difference.
> >>
> >> Sounds like a very important difference to stress in the commit message.
> >>
> >> Secondly, I am unclear whether we have agreement on whether the single
> >> timeline flag is leaking implementation details of the execlists scheduler
> >> to userspace or not?
> >
> > I do think Jason&me agree on that it does leak an internal concept to
> > userspace that we shouldn't leak.
> >
> > I'm honestly not entirely understanding your argument for why
> > single_timeline isn't an internal concept somehow, and how exposing it to
> > userspace doesn't leak that concept to userspace. Whether internally that
> > concept is now perfectly represented by just struct intel_timeline, or
> > maybe more the seqno/hswp, or more diffused through the code doesn't
> > really change that we have an internal concept that we're now exposing for
> > sharing in ways that wasn't possible before.
>
> Don't know, obviously we think with very different paradigms.
>
> GEM context always had as many timelines as there are engines in it's
> map so multiple timelines is the default mode everyone is aware of.
>
> Single timeline flag added a new mode where instead of multiple
> timelines single GEM context becomes a single timeline.
>
> The fact that userspace can achieve the single timeline execution on its
> own should be an argument enough that it is not a new concept that got
> leaked out. Definitely not any backend specific implementation details.
> It simply added a new feature which may or may not have been needed.

I just commented on the SINGLE_TIMELINE patch and will send the v3
momentarily.  I think you'll find the commit message much more to your
liking. :-)

--Jason

> Regards,
>
> Tvrtko
>
> P.S.
> Or rename the flag in your mind to "I915_GEM_CONTEXT_SERIAL_EXECUTION"
> or something and see if that still leaks the timeline or some
> implementation details.
>
> P.P.S Keep in mind I am arguing on wording in single timeline flag
> removal. Removal of timeline cloning is not controversial.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index d28ac79de7573..f88bac19333ec 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1983,207 +1983,14 @@  static int create_setparam(struct i915_user_extension __user *ext, void *data)
 	return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
 }
 
-static int clone_engines(struct i915_gem_context *dst,
-			 struct i915_gem_context *src)
+static int invalid_ext(struct i915_user_extension __user *ext, void *data)
 {
-	struct i915_gem_engines *clone, *e;
-	bool user_engines;
-	unsigned long n;
-
-	e = __context_engines_await(src, &user_engines);
-	if (!e)
-		return -ENOENT;
-
-	clone = alloc_engines(e->num_engines);
-	if (!clone)
-		goto err_unlock;
-
-	for (n = 0; n < e->num_engines; n++) {
-		struct intel_engine_cs *engine;
-
-		if (!e->engines[n]) {
-			clone->engines[n] = NULL;
-			continue;
-		}
-		engine = e->engines[n]->engine;
-
-		/*
-		 * Virtual engines are singletons; they can only exist
-		 * inside a single context, because they embed their
-		 * HW context... As each virtual context implies a single
-		 * timeline (each engine can only dequeue a single request
-		 * at any time), it would be surprising for two contexts
-		 * to use the same engine. So let's create a copy of
-		 * the virtual engine instead.
-		 */
-		if (intel_engine_is_virtual(engine))
-			clone->engines[n] =
-				intel_execlists_clone_virtual(engine);
-		else
-			clone->engines[n] = intel_context_create(engine);
-		if (IS_ERR_OR_NULL(clone->engines[n])) {
-			__free_engines(clone, n);
-			goto err_unlock;
-		}
-
-		intel_context_set_gem(clone->engines[n], dst);
-	}
-	clone->num_engines = n;
-	i915_sw_fence_complete(&e->fence);
-
-	/* Serialised by constructor */
-	engines_idle_release(dst, rcu_replace_pointer(dst->engines, clone, 1));
-	if (user_engines)
-		i915_gem_context_set_user_engines(dst);
-	else
-		i915_gem_context_clear_user_engines(dst);
-	return 0;
-
-err_unlock:
-	i915_sw_fence_complete(&e->fence);
-	return -ENOMEM;
-}
-
-static int clone_flags(struct i915_gem_context *dst,
-		       struct i915_gem_context *src)
-{
-	dst->user_flags = src->user_flags;
-	return 0;
-}
-
-static int clone_schedattr(struct i915_gem_context *dst,
-			   struct i915_gem_context *src)
-{
-	dst->sched = src->sched;
-	return 0;
-}
-
-static int clone_sseu(struct i915_gem_context *dst,
-		      struct i915_gem_context *src)
-{
-	struct i915_gem_engines *e = i915_gem_context_lock_engines(src);
-	struct i915_gem_engines *clone;
-	unsigned long n;
-	int err;
-
-	/* no locking required; sole access under constructor*/
-	clone = __context_engines_static(dst);
-	if (e->num_engines != clone->num_engines) {
-		err = -EINVAL;
-		goto unlock;
-	}
-
-	for (n = 0; n < e->num_engines; n++) {
-		struct intel_context *ce = e->engines[n];
-
-		if (clone->engines[n]->engine->class != ce->engine->class) {
-			/* Must have compatible engine maps! */
-			err = -EINVAL;
-			goto unlock;
-		}
-
-		/* serialises with set_sseu */
-		err = intel_context_lock_pinned(ce);
-		if (err)
-			goto unlock;
-
-		clone->engines[n]->sseu = ce->sseu;
-		intel_context_unlock_pinned(ce);
-	}
-
-	err = 0;
-unlock:
-	i915_gem_context_unlock_engines(src);
-	return err;
-}
-
-static int clone_timeline(struct i915_gem_context *dst,
-			  struct i915_gem_context *src)
-{
-	if (src->timeline)
-		__assign_timeline(dst, src->timeline);
-
-	return 0;
-}
-
-static int clone_vm(struct i915_gem_context *dst,
-		    struct i915_gem_context *src)
-{
-	struct i915_address_space *vm;
-	int err = 0;
-
-	if (!rcu_access_pointer(src->vm))
-		return 0;
-
-	rcu_read_lock();
-	vm = context_get_vm_rcu(src);
-	rcu_read_unlock();
-
-	if (!mutex_lock_interruptible(&dst->mutex)) {
-		__assign_ppgtt(dst, vm);
-		mutex_unlock(&dst->mutex);
-	} else {
-		err = -EINTR;
-	}
-
-	i915_vm_put(vm);
-	return err;
-}
-
-static int create_clone(struct i915_user_extension __user *ext, void *data)
-{
-	static int (* const fn[])(struct i915_gem_context *dst,
-				  struct i915_gem_context *src) = {
-#define MAP(x, y) [ilog2(I915_CONTEXT_CLONE_##x)] = y
-		MAP(ENGINES, clone_engines),
-		MAP(FLAGS, clone_flags),
-		MAP(SCHEDATTR, clone_schedattr),
-		MAP(SSEU, clone_sseu),
-		MAP(TIMELINE, clone_timeline),
-		MAP(VM, clone_vm),
-#undef MAP
-	};
-	struct drm_i915_gem_context_create_ext_clone local;
-	const struct create_ext *arg = data;
-	struct i915_gem_context *dst = arg->ctx;
-	struct i915_gem_context *src;
-	int err, bit;
-
-	if (copy_from_user(&local, ext, sizeof(local)))
-		return -EFAULT;
-
-	BUILD_BUG_ON(GENMASK(BITS_PER_TYPE(local.flags) - 1, ARRAY_SIZE(fn)) !=
-		     I915_CONTEXT_CLONE_UNKNOWN);
-
-	if (local.flags & I915_CONTEXT_CLONE_UNKNOWN)
-		return -EINVAL;
-
-	if (local.rsvd)
-		return -EINVAL;
-
-	rcu_read_lock();
-	src = __i915_gem_context_lookup_rcu(arg->fpriv, local.clone_id);
-	rcu_read_unlock();
-	if (!src)
-		return -ENOENT;
-
-	GEM_BUG_ON(src == dst);
-
-	for (bit = 0; bit < ARRAY_SIZE(fn); bit++) {
-		if (!(local.flags & BIT(bit)))
-			continue;
-
-		err = fn[bit](dst, src);
-		if (err)
-			return err;
-	}
-
-	return 0;
+	return -EINVAL;
 }
 
 static const i915_user_extension_fn create_extensions[] = {
 	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
-	[I915_CONTEXT_CREATE_EXT_CLONE] = create_clone,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext,
 };
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4c4b9254def1b..33ef78cb1deb7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1841,20 +1841,10 @@  struct drm_i915_gem_context_create_ext_setparam {
 	struct drm_i915_gem_context_param param;
 };
 
-struct drm_i915_gem_context_create_ext_clone {
+/* This API has been removed.  On the off chance someone somewhere has
+ * attempted to use it, never re-use this extension number.
+ */
 #define I915_CONTEXT_CREATE_EXT_CLONE 1
-	struct i915_user_extension base;
-	__u32 clone_id;
-	__u32 flags;
-#define I915_CONTEXT_CLONE_ENGINES	(1u << 0)
-#define I915_CONTEXT_CLONE_FLAGS	(1u << 1)
-#define I915_CONTEXT_CLONE_SCHEDATTR	(1u << 2)
-#define I915_CONTEXT_CLONE_SSEU		(1u << 3)
-#define I915_CONTEXT_CLONE_TIMELINE	(1u << 4)
-#define I915_CONTEXT_CLONE_VM		(1u << 5)
-#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1)
-	__u64 rsvd;
-};
 
 struct drm_i915_gem_context_destroy {
 	__u32 ctx_id;