diff mbox

[v1,2/4] drm/i915: Add provision to extend Golden context batch

Message ID 1437149334-33617-3-git-send-email-arun.siluvery@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

arun.siluvery@linux.intel.com July 17, 2015, 4:08 p.m. UTC
The Golden batch carries 3D state at the beginning so that HW starts with
a known state. It is carried as a binary blob which is auto-generated from
source. The idea was it would be easier to maintain and keep the complexity
out of the kernel which makes sense as we don't really touch it. However if
you really need to update it then you need to update generator source and
keep the binary blob in sync with it.

There is a need to patch this in bxt to send one additional command to enable
a feature. A solution was to patch the binary data with some additional
data structures (included as part of auto-generator source) but it was
unnecessarily complicated.

Chris suggested the idea of having a secondary batch and execute two batch
buffers. It has clear advantages as we needn't touch the base golden batch,
can customize secondary/auxiliary batch depending on Gen and can be carried
in the driver with no dependencies.

This patch adds support for this auxiliary batch which is inserted at the
end of golden batch and is completely independent from it. Thanks to Mika
for the preliminary review.

Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Armin Reese <armin.c.reese@intel.com>
Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
 drivers/gpu/drm/i915/intel_lrc.c             |  6 ++++++
 3 files changed, 35 insertions(+)

Comments

Chris Wilson July 17, 2015, 4:23 p.m. UTC | #1
On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
> The Golden batch carries 3D state at the beginning so that HW starts with
> a known state. It is carried as a binary blob which is auto-generated from
> source. The idea was it would be easier to maintain and keep the complexity
> out of the kernel which makes sense as we don't really touch it. However if
> you really need to update it then you need to update generator source and
> keep the binary blob in sync with it.
> 
> There is a need to patch this in bxt to send one additional command to enable
> a feature. A solution was to patch the binary data with some additional
> data structures (included as part of auto-generator source) but it was
> unnecessarily complicated.
> 
> Chris suggested the idea of having a secondary batch and execute two batch
> buffers. It has clear advantages as we needn't touch the base golden batch,
> can customize secondary/auxiliary batch depending on Gen and can be carried
> in the driver with no dependencies.
> 
> This patch adds support for this auxiliary batch which is inserted at the
> end of golden batch and is completely independent from it. Thanks to Mika
> for the preliminary review.
> 
> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Armin Reese <armin.c.reese@intel.com>
> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
>  drivers/gpu/drm/i915/intel_lrc.c             |  6 ++++++
>  3 files changed, 35 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> index b6492fe..b86e382 100644
> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -73,6 +73,15 @@ free_gem:
>  	return ret;
>  }
>  
> +#define OUT_BATCH(batch, i, val)				\
> +	do {							\
> +		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\

We have to be slightly more careful here, as we don't have the full page
available since we put render state into the high arena of the golden
bb. Something like WARN_ON(i > PAGE/sizeof(u32) || (batch)[i]) should
suffice.

> @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so)
>  
>  		d[i++] = s;
>  	}
> +
> +	while (i % CACHELINE_DWORDS)
> +		OUT_BATCH(d, i, MI_NOOP);
> +
> +	so->aux_batch_offset = i * sizeof(u32);
> +
> +	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
> +	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;

Strictly, and if we are passing the batch length we are being strictly
conformant, then the aux_batch_size must be a multiple of 8.

> +
>  	kunmap(page);
>  
>  	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
> @@ -128,6 +146,8 @@ err_out:
>  	return ret;
>  }
>  
> +#undef OUT_BATCH
> +
>  void i915_gem_render_state_fini(struct render_state *so)
>  {
>  	i915_gem_object_ggtt_unpin(so->obj);
> @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>  	if (ret)
>  		goto out;
>  
Then we need only execute this BB if so.aux_batch_size > 8

> +	ret = req->ring->dispatch_execbuffer(req,
> +					     (so.ggtt_offset + so.aux_batch_offset),
> +					     so.aux_batch_size,
> +					     I915_DISPATCH_SECURE);
> +	if (ret)
> +		goto out;
> +
>  	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-Chris
Mika Kuoppala July 17, 2015, 4:37 p.m. UTC | #2
Chris Wilson <chris@chris-wilson.co.uk> writes:

> On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
>> The Golden batch carries 3D state at the beginning so that HW starts with
>> a known state. It is carried as a binary blob which is auto-generated from
>> source. The idea was it would be easier to maintain and keep the complexity
>> out of the kernel which makes sense as we don't really touch it. However if
>> you really need to update it then you need to update generator source and
>> keep the binary blob in sync with it.
>> 
>> There is a need to patch this in bxt to send one additional command to enable
>> a feature. A solution was to patch the binary data with some additional
>> data structures (included as part of auto-generator source) but it was
>> unnecessarily complicated.
>> 
>> Chris suggested the idea of having a secondary batch and execute two batch
>> buffers. It has clear advantages as we needn't touch the base golden batch,
>> can customize secondary/auxiliary batch depending on Gen and can be carried
>> in the driver with no dependencies.
>> 
>> This patch adds support for this auxiliary batch which is inserted at the
>> end of golden batch and is completely independent from it. Thanks to Mika
>> for the preliminary review.
>> 
>> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: Armin Reese <armin.c.reese@intel.com>
>> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
>>  drivers/gpu/drm/i915/intel_lrc.c             |  6 ++++++
>>  3 files changed, 35 insertions(+)
>> 
>> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> index b6492fe..b86e382 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> @@ -73,6 +73,15 @@ free_gem:
>>  	return ret;
>>  }
>>  
>> +#define OUT_BATCH(batch, i, val)				\
>> +	do {							\
>> +		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
>
> We have to be slightly more careful here, as we don't have the full page
> available since we put render state into the high arena of the golden
> bb. Something like WARN_ON(i > PAGE/sizeof(u32) || (batch)[i]) should
> suffice.
>

Null state gen makes the final batch with two passes. First
it builds command and state separately. And when size of both
are know, it compacts by relocating the state right after
the commands (+some alignment).

So we should have the rest of the page usable for auxillary
commands here as we have already copied the state part
also.

-Mika

>> @@ -110,6 +119,15 @@ static int render_state_setup(struct render_state *so)
>>  
>>  		d[i++] = s;
>>  	}
>> +
>> +	while (i % CACHELINE_DWORDS)
>> +		OUT_BATCH(d, i, MI_NOOP);
>> +
>> +	so->aux_batch_offset = i * sizeof(u32);
>> +
>> +	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
>> +	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
>
> Strictly, and if we are passing the batch length we are being strictly
> conformant, then the aux_batch_size must be a multiple of 8.
>
>> +
>>  	kunmap(page);
>>  
>>  	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
>> @@ -128,6 +146,8 @@ err_out:
>>  	return ret;
>>  }
>>  
>> +#undef OUT_BATCH
>> +
>>  void i915_gem_render_state_fini(struct render_state *so)
>>  {
>>  	i915_gem_object_ggtt_unpin(so->obj);
>> @@ -176,6 +196,13 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req)
>>  	if (ret)
>>  		goto out;
>>  
> Then we need only execute this BB if so.aux_batch_size > 8
>
>> +	ret = req->ring->dispatch_execbuffer(req,
>> +					     (so.ggtt_offset + so.aux_batch_offset),
>> +					     so.aux_batch_size,
>> +					     I915_DISPATCH_SECURE);
>> +	if (ret)
>> +		goto out;
>> +
>>  	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
> -Chris
>
> -- 
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson July 17, 2015, 4:48 p.m. UTC | #3
On Fri, Jul 17, 2015 at 07:37:45PM +0300, Mika Kuoppala wrote:
> Chris Wilson <chris@chris-wilson.co.uk> writes:
> 
> > On Fri, Jul 17, 2015 at 05:08:52PM +0100, Arun Siluvery wrote:
> >> The Golden batch carries 3D state at the beginning so that HW starts with
> >> a known state. It is carried as a binary blob which is auto-generated from
> >> source. The idea was it would be easier to maintain and keep the complexity
> >> out of the kernel which makes sense as we don't really touch it. However if
> >> you really need to update it then you need to update generator source and
> >> keep the binary blob in sync with it.
> >> 
> >> There is a need to patch this in bxt to send one additional command to enable
> >> a feature. A solution was to patch the binary data with some additional
> >> data structures (included as part of auto-generator source) but it was
> >> unnecessarily complicated.
> >> 
> >> Chris suggested the idea of having a secondary batch and execute two batch
> >> buffers. It has clear advantages as we needn't touch the base golden batch,
> >> can customize secondary/auxiliary batch depending on Gen and can be carried
> >> in the driver with no dependencies.
> >> 
> >> This patch adds support for this auxiliary batch which is inserted at the
> >> end of golden batch and is completely independent from it. Thanks to Mika
> >> for the preliminary review.
> >> 
> >> Cc: Mika Kuoppala <mika.kuoppala@intel.com>
> >> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> >> Cc: Armin Reese <armin.c.reese@intel.com>
> >> Signed-off-by: Arun Siluvery <arun.siluvery@linux.intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_gem_render_state.c | 27 +++++++++++++++++++++++++++
> >>  drivers/gpu/drm/i915/i915_gem_render_state.h |  2 ++
> >>  drivers/gpu/drm/i915/intel_lrc.c             |  6 ++++++
> >>  3 files changed, 35 insertions(+)
> >> 
> >> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >> index b6492fe..b86e382 100644
> >> --- a/drivers/gpu/drm/i915/i915_gem_render_state.c
> >> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> >> @@ -73,6 +73,15 @@ free_gem:
> >>  	return ret;
> >>  }
> >>  
> >> +#define OUT_BATCH(batch, i, val)				\
> >> +	do {							\
> >> +		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
> >
> > We have to be slightly more careful here, as we don't have the full page
> > available since we put render state into the high arena of the golden
> > bb. Something like WARN_ON(i > PAGE/sizeof(u32) || (batch)[i]) should
> > suffice.
> >
> 
> Null state gen makes the final batch with two passes. First
> it builds command and state separately. And when size of both
> are know, it compacts by relocating the state right after
> the commands (+some alignment).
> 
> So we should have the rest of the page usable for auxillary
> commands here as we have already copied the state part
> also.

Ta. Maybe add some words of enlightenment here for future me as well?
Also we will need to document that the kernel then relies on the packing
to add extra commands after the batch to the null state generator.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index b6492fe..b86e382 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -73,6 +73,15 @@  free_gem:
 	return ret;
 }
 
+#define OUT_BATCH(batch, i, val)				\
+	do {							\
+		if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {	\
+			ret = -ENOSPC;				\
+			goto err_out;				\
+		}						\
+		(batch)[(i)++] = (val);				\
+	} while(0)
+
 static int render_state_setup(struct render_state *so)
 {
 	const struct intel_renderstate_rodata *rodata = so->rodata;
@@ -110,6 +119,15 @@  static int render_state_setup(struct render_state *so)
 
 		d[i++] = s;
 	}
+
+	while (i % CACHELINE_DWORDS)
+		OUT_BATCH(d, i, MI_NOOP);
+
+	so->aux_batch_offset = i * sizeof(u32);
+
+	OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
+	so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
+
 	kunmap(page);
 
 	ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
@@ -128,6 +146,8 @@  err_out:
 	return ret;
 }
 
+#undef OUT_BATCH
+
 void i915_gem_render_state_fini(struct render_state *so)
 {
 	i915_gem_object_ggtt_unpin(so->obj);
@@ -176,6 +196,13 @@  int i915_gem_render_state_init(struct drm_i915_gem_request *req)
 	if (ret)
 		goto out;
 
+	ret = req->ring->dispatch_execbuffer(req,
+					     (so.ggtt_offset + so.aux_batch_offset),
+					     so.aux_batch_size,
+					     I915_DISPATCH_SECURE);
+	if (ret)
+		goto out;
+
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out:
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h
index 7aa7372..79de101 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.h
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.h
@@ -37,6 +37,8 @@  struct render_state {
 	struct drm_i915_gem_object *obj;
 	u64 ggtt_offset;
 	int gen;
+	u32 aux_batch_size;
+	u64 aux_batch_offset;
 };
 
 int i915_gem_render_state_init(struct drm_i915_gem_request *req);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index adb386d..5e4771e 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1661,6 +1661,12 @@  static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
 	if (ret)
 		goto out;
 
+	ret = req->ring->emit_bb_start(req,
+				       (so.ggtt_offset + so.aux_batch_offset),
+				       I915_DISPATCH_SECURE);
+	if (ret)
+		goto out;
+
 	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
 
 out: