diff mbox series

[2/3] drm/i915: Allow userspace to specify ringsize on construction

Message ID 20191115160546.896305-2-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915: Flush idle barriers when waiting | expand

Commit Message

Chris Wilson Nov. 15, 2019, 4:05 p.m. UTC
No good reason why we must always use a static ringsize, so let
userspace select one during construction.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c | 139 +++++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_lrc.c         |   1 +
 include/uapi/drm/i915_drm.h                 |  19 +++
 3 files changed, 153 insertions(+), 6 deletions(-)

Comments

Janusz Krzysztofik Nov. 18, 2019, 11:14 a.m. UTC | #1
Hi Chris,

Only some minor comments from me, mostly out of my curiosity.

On Friday, November 15, 2019 5:05:45 PM CET Chris Wilson wrote:
> No good reason why we must always use a static ringsize, so let
> userspace select one during construction.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_context.c | 139 +++++++++++++++++++-
>  drivers/gpu/drm/i915/gt/intel_lrc.c         |   1 +
>  include/uapi/drm/i915_drm.h                 |  19 +++
>  3 files changed, 153 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> index 1284f47303fa..ee9a4634fed8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
> @@ -597,23 +597,30 @@ __create_context(struct drm_i915_private *i915)
>  	return ERR_PTR(err);
>  }
>  
> -static void
> +static int
>  context_apply_all(struct i915_gem_context *ctx,
> -		  void (*fn)(struct intel_context *ce, void *data),
> +		  int (*fn)(struct intel_context *ce, void *data),
>  		  void *data)
>  {
>  	struct i915_gem_engines_iter it;
>  	struct intel_context *ce;
> +	int err = 0;
>  
> -	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
> -		fn(ce, data);
> +	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
> +		err = fn(ce, data);
> +		if (err)
> +			break;
> +	}
>  	i915_gem_context_unlock_engines(ctx);
> +
> +	return err;
>  }
>  
> -static void __apply_ppgtt(struct intel_context *ce, void *vm)
> +static int __apply_ppgtt(struct intel_context *ce, void *vm)
>  {
>  	i915_vm_put(ce->vm);
>  	ce->vm = i915_vm_get(vm);
> +	return 0;
>  }
>  
>  static struct i915_address_space *
> @@ -651,9 +658,10 @@ static void __set_timeline(struct intel_timeline **dst,
>  		intel_timeline_put(old);
>  }
>  
> -static void __apply_timeline(struct intel_context *ce, void *timeline)
> +static int __apply_timeline(struct intel_context *ce, void *timeline)
>  {
>  	__set_timeline(&ce->timeline, timeline);
> +	return 0;
>  }
>  
>  static void __assign_timeline(struct i915_gem_context *ctx,
> @@ -1223,6 +1231,104 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
>  	return err;
>  }
>  
> +static int __apply_ringsize(struct intel_context *ce, void *sz)
> +{
> +	int err;
> +
> +	err = i915_active_wait(&ce->active);
> +	if (err < 0)
> +		return err;
> +
> +	if (intel_context_lock_pinned(ce))
> +		return -EINTR;
> +
> +	if (intel_context_is_pinned(ce)) {
> +		err = -EBUSY; /* In active use, come back later! */
> +		goto unlock;
> +	}
> +
> +	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
> +		struct intel_ring *ring;
> +
> +		/* Replace the existing ringbuffer */
> +		ring = intel_engine_create_ring(ce->engine,
> +						(unsigned long)sz);
> +		if (IS_ERR(ring)) {
> +			err = PTR_ERR(ring);
> +			goto unlock;
> +		}
> +
> +		intel_ring_put(ce->ring);
> +		ce->ring = ring;
> +
> +		/* Context image will be updated on next pin */
> +	} else {
> +		ce->ring = sz;
> +	}
> +
> +unlock:
> +	intel_context_unlock_pinned(ce);
> +	return err;
> +}

I'm wondering if this function (and __get_ringsize() below as well), with its 
dependency on intel_context internals, especially on that dual meaning of 
ce->ring which depends on (ce->flags & CONTEXT_ALLOC_BIT), would better fit 
into drivers/gpu/drm/i915/gt/intel_context.c.

> +
> +static int set_ringsize(struct i915_gem_context *ctx,
> +			struct drm_i915_gem_context_param *args)
> +{
> +	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> +		return -ENODEV;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
> +		return -EINVAL;
> +
> +	if (args->value < I915_GTT_PAGE_SIZE)
> +		return -EINVAL;
> +
> +	if (args->value > 128 * I915_GTT_PAGE_SIZE)
> +		return -EINVAL;
> +
> +	return context_apply_all(ctx,
> +				 __apply_ringsize,
> +				 __intel_context_ring_size(args->value));
> +}
> +
> +static int __get_ringsize(struct intel_context *ce, void *arg)
> +{
> +	int num_pages;
> +
> +	if (intel_context_lock_pinned(ce))
> +		return -EINTR;
> +
> +	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
> +		num_pages = ce->ring->size / I915_GTT_PAGE_SIZE;
> +	else
> +		num_pages = (uintptr_t)ce->ring / I915_GTT_PAGE_SIZE;
> +
> +	intel_context_unlock_pinned(ce);
> +	return num_pages; /* stop on first engine */

Location of this comment seems not perfect to me as it is not quite obvious 
how that works without examining how this function is used, but having spent a 
while looking around, I'm not able to suggest a better place.

> +}
> +
> +static int get_ringsize(struct i915_gem_context *ctx,
> +			struct drm_i915_gem_context_param *args)
> +{
> +	int num_pages;
> +
> +	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> +		return -ENODEV;
> +
> +	if (args->size)
> +		return -EINVAL;
> +
> +	num_pages = context_apply_all(ctx, __get_ringsize, NULL);
> +	if (num_pages < 0)
> +		return num_pages;
> +
> +	args->value = (u64)num_pages * I915_GTT_PAGE_SIZE;

Do you convert to num_pages inside __get_ringsize() then back to size in bytes 
to avoid an overflow?  Or any other reason?  Something that may be useful in 
the future?

> +	return 0;
> +}
> +
>  static int gen8_emit_rpcs_config(struct i915_request *rq,
>  				 struct intel_context *ce,
>  				 struct intel_sseu sseu)
> @@ -1935,6 +2041,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
>  		ret = set_persistence(ctx, args);
>  		break;
>  
> +	case I915_CONTEXT_PARAM_RINGSIZE:
> +		ret = set_ringsize(ctx, args);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> @@ -2003,6 +2113,19 @@ static int clone_engines(struct i915_gem_context *dst,
>  			__free_engines(clone, n);
>  			goto err_unlock;
>  		}
> +
> +		/* Copy across the preferred ringsize */
> +		clone->engines[n]->ring = e->engines[n]->ring;
> +		if (test_bit(CONTEXT_ALLOC_BIT, &e->engines[n]->flags)) {
> +			if (intel_context_lock_pinned(e->engines[n])) {
> +				__free_engines(clone, n + 1);
> +				goto err_unlock;
> +			}
> +
> +			clone->engines[n]->ring =
> +				__intel_context_ring_size(e->engines[n]->ring->size);
> +			intel_context_unlock_pinned(e->engines[n]);
> +		}

Another candidate for a helper located in 
drivers/gpu/drm/i915/gt/intel_context.c?

>  	}
>  	clone->num_engines = n;
>  
> @@ -2366,6 +2489,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
>  		args->value = i915_gem_context_is_persistent(ctx);
>  		break;
>  
> +	case I915_CONTEXT_PARAM_RINGSIZE:
> +		ret = get_ringsize(ctx, args);
> +		break;
> +
>  	case I915_CONTEXT_PARAM_BAN_PERIOD:
>  	default:
>  		ret = -EINVAL;
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
> index 33ce258d484f..2649690a951e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
> @@ -2417,6 +2417,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
>  	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
>  	regs[CTX_RING_HEAD] = ring->head;
>  	regs[CTX_RING_TAIL] = ring->tail;
> +	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
>  
>  	/* RPCS */
>  	if (engine->class == RENDER_CLASS) {
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5400d7e057f1..ae7cd681b075 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -1587,6 +1587,25 @@ struct drm_i915_gem_context_param {
>   * By default, new contexts allow persistence.
>   */
>  #define I915_CONTEXT_PARAM_PERSISTENCE	0xb
> +
> +/*
> + * I915_CONTEXT_PARAM_RINGSIZE:
> + *
> + * Sets the size of the CS ringbuffer to use for logical ring contexts. This
> + * applies a limit of how many batches can be queued to HW before the caller
> + * is blocked due to lack of space for more commands.
> + *
> + * Only reliably possible to be set prior to first use, i.e. during
> + * construction. At any later point, the current execution must be flushed as
> + * the ring can only be changed while the context is idle.
> + *
> + * Only applies to the current set of engine and lost when those engines
> + * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
> + *
> + * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
> + * Default is 16 KiB.
> + */
> +#define I915_CONTEXT_PARAM_RINGSIZE	0xc

I know it looked like that already before, but having other documented flags 
separated by blank lines from each other, Is there any reason for not putting 
another blank line after the last one?

>  /* Must be kept compact -- no holes and well documented */
>  
>  	__u64 value;
> 

None of the above comments are essential so with or without them addressed:
Reviewed-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>

Thanks,
Janusz
Chris Wilson Nov. 18, 2019, 11:28 a.m. UTC | #2
Quoting Janusz Krzysztofik (2019-11-18 11:14:12)
> Hi Chris,
> 
> Only some minor comments from me, mostly out of my curiosity.
> 
> On Friday, November 15, 2019 5:05:45 PM CET Chris Wilson wrote:
> > No good reason why we must always use a static ringsize, so let
> > userspace select one during construction.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
> > ---
> > +static int __apply_ringsize(struct intel_context *ce, void *sz)
> > +{
> > +     int err;
> > +
> > +     err = i915_active_wait(&ce->active);
> > +     if (err < 0)
> > +             return err;
> > +
> > +     if (intel_context_lock_pinned(ce))
> > +             return -EINTR;
> > +
> > +     if (intel_context_is_pinned(ce)) {
> > +             err = -EBUSY; /* In active use, come back later! */
> > +             goto unlock;
> > +     }
> > +
> > +     if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
> > +             struct intel_ring *ring;
> > +
> > +             /* Replace the existing ringbuffer */
> > +             ring = intel_engine_create_ring(ce->engine,
> > +                                             (unsigned long)sz);
> > +             if (IS_ERR(ring)) {
> > +                     err = PTR_ERR(ring);
> > +                     goto unlock;
> > +             }
> > +
> > +             intel_ring_put(ce->ring);
> > +             ce->ring = ring;
> > +
> > +             /* Context image will be updated on next pin */
> > +     } else {
> > +             ce->ring = sz;
> > +     }
> > +
> > +unlock:
> > +     intel_context_unlock_pinned(ce);
> > +     return err;
> > +}
> 
> I'm wondering if this function (and __get_ringsize() below as well), with its 
> dependency on intel_context internals, especially on that dual meaning of 
> ce->ring which depends on (ce->flags & CONTEXT_ALLOC_BIT), would better fit 
> into drivers/gpu/drm/i915/gt/intel_context.c.

Possibly, but at the same time it's currently only implementing a
feature of the GEM context.

I hear you, I'm just resisting, mainly because I don't want to have to
think of a good name :)

intel_context_param.c
intel_context_ring.c

I might be able to find friends for either.

> > +static int set_ringsize(struct i915_gem_context *ctx,
> > +                     struct drm_i915_gem_context_param *args)
> > +{
> > +     if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> > +             return -ENODEV;
> > +
> > +     if (args->size)
> > +             return -EINVAL;
> > +
> > +     if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
> > +             return -EINVAL;
> > +
> > +     if (args->value < I915_GTT_PAGE_SIZE)
> > +             return -EINVAL;
> > +
> > +     if (args->value > 128 * I915_GTT_PAGE_SIZE)
> > +             return -EINVAL;
> > +
> > +     return context_apply_all(ctx,
> > +                              __apply_ringsize,
> > +                              __intel_context_ring_size(args->value));
> > +}
> > +
> > +static int __get_ringsize(struct intel_context *ce, void *arg)
> > +{
> > +     int num_pages;
> > +
> > +     if (intel_context_lock_pinned(ce))
> > +             return -EINTR;
> > +
> > +     if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
> > +             num_pages = ce->ring->size / I915_GTT_PAGE_SIZE;
> > +     else
> > +             num_pages = (uintptr_t)ce->ring / I915_GTT_PAGE_SIZE;
> > +
> > +     intel_context_unlock_pinned(ce);
> > +     return num_pages; /* stop on first engine */
> 
> Location of this comment seems not perfect to me as it is not quite obvious 
> how that works without examining how this function is used, but having spent a 
> while looking around, I'm not able to suggest a better place.

Yeah, the comment is for the intent of returning the positive, so
there's definitely a case for explaining the unusual pattern here.

The calling loop is just a standard if (err) return err; propagation so
that hardly merits a long winded explanation.

> > +static int get_ringsize(struct i915_gem_context *ctx,
> > +                     struct drm_i915_gem_context_param *args)
> > +{
> > +     int num_pages;
> > +
> > +     if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
> > +             return -ENODEV;
> > +
> > +     if (args->size)
> > +             return -EINVAL;
> > +
> > +     num_pages = context_apply_all(ctx, __get_ringsize, NULL);
> > +     if (num_pages < 0)
> > +             return num_pages;
> > +
> > +     args->value = (u64)num_pages * I915_GTT_PAGE_SIZE;
> 
> Do you convert to num_pages inside __get_ringsize() then back to size in bytes 
> to avoid an overflow?  Or any other reason?  Something that may be useful in 
> the future?

Just being prudent in making sure we have sufficient bits across all the
type-narrowing.

> > @@ -2003,6 +2113,19 @@ static int clone_engines(struct i915_gem_context *dst,
> >                       __free_engines(clone, n);
> >                       goto err_unlock;
> >               }
> > +
> > +             /* Copy across the preferred ringsize */
> > +             clone->engines[n]->ring = e->engines[n]->ring;
> > +             if (test_bit(CONTEXT_ALLOC_BIT, &e->engines[n]->flags)) {
> > +                     if (intel_context_lock_pinned(e->engines[n])) {
> > +                             __free_engines(clone, n + 1);
> > +                             goto err_unlock;
> > +                     }
> > +
> > +                     clone->engines[n]->ring =
> > +                             __intel_context_ring_size(e->engines[n]->ring->size);
> > +                     intel_context_unlock_pinned(e->engines[n]);
> > +             }
> 
> Another candidate for a helper located in 
> drivers/gpu/drm/i915/gt/intel_context.c?

This is much less of a candidate for potential reuse as I feel it is very
peculiar to the GEM->engines[], and not a fit for ugpu. At least as
currently written; an intel_context_get_ring_size() to go along with
intel_context_set_ring_size(), maybe.

> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 5400d7e057f1..ae7cd681b075 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -1587,6 +1587,25 @@ struct drm_i915_gem_context_param {
> >   * By default, new contexts allow persistence.
> >   */
> >  #define I915_CONTEXT_PARAM_PERSISTENCE       0xb
> > +
> > +/*
> > + * I915_CONTEXT_PARAM_RINGSIZE:
> > + *
> > + * Sets the size of the CS ringbuffer to use for logical ring contexts. This
> > + * applies a limit of how many batches can be queued to HW before the caller
> > + * is blocked due to lack of space for more commands.
> > + *
> > + * Only reliably possible to be set prior to first use, i.e. during
> > + * construction. At any later point, the current execution must be flushed as
> > + * the ring can only be changed while the context is idle.
> > + *
> > + * Only applies to the current set of engine and lost when those engines
> > + * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
> > + *
> > + * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
> > + * Default is 16 KiB.
> > + */
> > +#define I915_CONTEXT_PARAM_RINGSIZE  0xc
> 
> I know it looked like that already before, but having other documented flags 
> separated by blank lines from each other, Is there any reason for not putting 
> another blank line after the last one?
> 
> >  /* Must be kept compact -- no holes and well documented */

You mean before the /* Must be... */? My intent is to make it conflict
and force people to take notice of the instruction.
-Chris
Chris Wilson Nov. 25, 2019, 10:05 a.m. UTC | #3
Quoting Chris Wilson (2019-11-15 16:05:45)
> No good reason why we must always use a static ringsize, so let
> userspace select one during construction.

Do we have any news on whether userspace has materialised for this yet?

It's literally just

--- a/runtime/os_interface/linux/drm_neo.cpp
+++ b/runtime/os_interface/linux/drm_neo.cpp
@@ -182,6 +182,15 @@ void setNonPersistent(uint32_t drmContextId) {
     ioctl(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &gcp);
 }

+void setMaxRingSize(uint32_t drmContextId) {
+    drm_i915_gem_context_param gcp = {};
+    gcp.ctx_id = drmContextId;
+    gcp.param = 0xc; /* I915_CONTEXT_PARAM_RINGSIZE; */
+    gcp.value = 128 << 12; /* maximum ring size is 512KiB, or 128 pages */
+
+    ioctl(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &gcp);
+}
+
 uint32_t Drm::createDrmContext() {
     drm_i915_gem_context_create gcc = {};
     auto retVal = ioctl(DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &gcc);
@@ -190,6 +199,9 @@ uint32_t Drm::createDrmContext() {
     /* enable cleanup of resources on process termination */
     setNonPersistent(gcc.ctx_id);

+    /* Big rings for silly amounts of non-blocking work! */
+    setMaxRingSize(gcc.ctx_id);

with some justification.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 1284f47303fa..ee9a4634fed8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -597,23 +597,30 @@  __create_context(struct drm_i915_private *i915)
 	return ERR_PTR(err);
 }
 
-static void
+static int
 context_apply_all(struct i915_gem_context *ctx,
-		  void (*fn)(struct intel_context *ce, void *data),
+		  int (*fn)(struct intel_context *ce, void *data),
 		  void *data)
 {
 	struct i915_gem_engines_iter it;
 	struct intel_context *ce;
+	int err = 0;
 
-	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
-		fn(ce, data);
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		err = fn(ce, data);
+		if (err)
+			break;
+	}
 	i915_gem_context_unlock_engines(ctx);
+
+	return err;
 }
 
-static void __apply_ppgtt(struct intel_context *ce, void *vm)
+static int __apply_ppgtt(struct intel_context *ce, void *vm)
 {
 	i915_vm_put(ce->vm);
 	ce->vm = i915_vm_get(vm);
+	return 0;
 }
 
 static struct i915_address_space *
@@ -651,9 +658,10 @@  static void __set_timeline(struct intel_timeline **dst,
 		intel_timeline_put(old);
 }
 
-static void __apply_timeline(struct intel_context *ce, void *timeline)
+static int __apply_timeline(struct intel_context *ce, void *timeline)
 {
 	__set_timeline(&ce->timeline, timeline);
+	return 0;
 }
 
 static void __assign_timeline(struct i915_gem_context *ctx,
@@ -1223,6 +1231,104 @@  static int set_ppgtt(struct drm_i915_file_private *file_priv,
 	return err;
 }
 
+static int __apply_ringsize(struct intel_context *ce, void *sz)
+{
+	int err;
+
+	err = i915_active_wait(&ce->active);
+	if (err < 0)
+		return err;
+
+	if (intel_context_lock_pinned(ce))
+		return -EINTR;
+
+	if (intel_context_is_pinned(ce)) {
+		err = -EBUSY; /* In active use, come back later! */
+		goto unlock;
+	}
+
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		struct intel_ring *ring;
+
+		/* Replace the existing ringbuffer */
+		ring = intel_engine_create_ring(ce->engine,
+						(unsigned long)sz);
+		if (IS_ERR(ring)) {
+			err = PTR_ERR(ring);
+			goto unlock;
+		}
+
+		intel_ring_put(ce->ring);
+		ce->ring = ring;
+
+		/* Context image will be updated on next pin */
+	} else {
+		ce->ring = sz;
+	}
+
+unlock:
+	intel_context_unlock_pinned(ce);
+	return err;
+}
+
+static int set_ringsize(struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
+{
+	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
+		return -ENODEV;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
+		return -EINVAL;
+
+	if (args->value < I915_GTT_PAGE_SIZE)
+		return -EINVAL;
+
+	if (args->value > 128 * I915_GTT_PAGE_SIZE)
+		return -EINVAL;
+
+	return context_apply_all(ctx,
+				 __apply_ringsize,
+				 __intel_context_ring_size(args->value));
+}
+
+static int __get_ringsize(struct intel_context *ce, void *arg)
+{
+	int num_pages;
+
+	if (intel_context_lock_pinned(ce))
+		return -EINTR;
+
+	if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
+		num_pages = ce->ring->size / I915_GTT_PAGE_SIZE;
+	else
+		num_pages = (uintptr_t)ce->ring / I915_GTT_PAGE_SIZE;
+
+	intel_context_unlock_pinned(ce);
+	return num_pages; /* stop on first engine */
+}
+
+static int get_ringsize(struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
+{
+	int num_pages;
+
+	if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
+		return -ENODEV;
+
+	if (args->size)
+		return -EINVAL;
+
+	num_pages = context_apply_all(ctx, __get_ringsize, NULL);
+	if (num_pages < 0)
+		return num_pages;
+
+	args->value = (u64)num_pages * I915_GTT_PAGE_SIZE;
+	return 0;
+}
+
 static int gen8_emit_rpcs_config(struct i915_request *rq,
 				 struct intel_context *ce,
 				 struct intel_sseu sseu)
@@ -1935,6 +2041,10 @@  static int ctx_setparam(struct drm_i915_file_private *fpriv,
 		ret = set_persistence(ctx, args);
 		break;
 
+	case I915_CONTEXT_PARAM_RINGSIZE:
+		ret = set_ringsize(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
@@ -2003,6 +2113,19 @@  static int clone_engines(struct i915_gem_context *dst,
 			__free_engines(clone, n);
 			goto err_unlock;
 		}
+
+		/* Copy across the preferred ringsize */
+		clone->engines[n]->ring = e->engines[n]->ring;
+		if (test_bit(CONTEXT_ALLOC_BIT, &e->engines[n]->flags)) {
+			if (intel_context_lock_pinned(e->engines[n])) {
+				__free_engines(clone, n + 1);
+				goto err_unlock;
+			}
+
+			clone->engines[n]->ring =
+				__intel_context_ring_size(e->engines[n]->ring->size);
+			intel_context_unlock_pinned(e->engines[n]);
+		}
 	}
 	clone->num_engines = n;
 
@@ -2366,6 +2489,10 @@  int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 		args->value = i915_gem_context_is_persistent(ctx);
 		break;
 
+	case I915_CONTEXT_PARAM_RINGSIZE:
+		ret = get_ringsize(ctx, args);
+		break;
+
 	case I915_CONTEXT_PARAM_BAN_PERIOD:
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 33ce258d484f..2649690a951e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2417,6 +2417,7 @@  __execlists_update_reg_state(const struct intel_context *ce,
 	regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
 	regs[CTX_RING_HEAD] = ring->head;
 	regs[CTX_RING_TAIL] = ring->tail;
+	regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
 
 	/* RPCS */
 	if (engine->class == RENDER_CLASS) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5400d7e057f1..ae7cd681b075 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1587,6 +1587,25 @@  struct drm_i915_gem_context_param {
  * By default, new contexts allow persistence.
  */
 #define I915_CONTEXT_PARAM_PERSISTENCE	0xb
+
+/*
+ * I915_CONTEXT_PARAM_RINGSIZE:
+ *
+ * Sets the size of the CS ringbuffer to use for logical ring contexts. This
+ * applies a limit of how many batches can be queued to HW before the caller
+ * is blocked due to lack of space for more commands.
+ *
+ * Only reliably possible to be set prior to first use, i.e. during
+ * construction. At any later point, the current execution must be flushed as
+ * the ring can only be changed while the context is idle.
+ *
+ * Only applies to the current set of engine and lost when those engines
+ * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
+ *
+ * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
+ * Default is 16 KiB.
+ */
+#define I915_CONTEXT_PARAM_RINGSIZE	0xc
 /* Must be kept compact -- no holes and well documented */
 
 	__u64 value;