diff mbox

[2/3] drm/i915: add render state initialization

Message ID 1398187184-14919-3-git-send-email-mika.kuoppala@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mika Kuoppala April 22, 2014, 5:19 p.m. UTC
HW guys say that it is not a cool idea to let device
go into rc6 without proper 3d pipeline state.

For each new uninitialized context, generate a
valid null render state to be run on context
creation.

This patch introduces a skeleton with emty states.

Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
---
 drivers/gpu/drm/i915/Makefile                 |    1 +
 drivers/gpu/drm/i915/i915_drv.h               |    2 +
 drivers/gpu/drm/i915/i915_gem_context.c       |    7 +
 drivers/gpu/drm/i915/i915_gem_render_state.c  |  222 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_renderstate_gen6.h |   11 ++
 drivers/gpu/drm/i915/intel_renderstate_gen7.h |   11 ++
 drivers/gpu/drm/i915/intel_renderstate_gen8.h |   11 ++
 7 files changed, 265 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c
 create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h
 create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h
 create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h

Comments

Daniel Vetter April 22, 2014, 8:26 p.m. UTC | #1
On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote:
> HW guys say that it is not a cool idea to let device
> go into rc6 without proper 3d pipeline state.
> 
> For each new uninitialized context, generate a
> valid null render state to be run on context
> creation.
> 
> This patch introduces a skeleton with emty states.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |    1 +
>  drivers/gpu/drm/i915/i915_drv.h               |    2 +
>  drivers/gpu/drm/i915/i915_gem_context.c       |    7 +
>  drivers/gpu/drm/i915/i915_gem_render_state.c  |  222 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_renderstate_gen6.h |   11 ++
>  drivers/gpu/drm/i915/intel_renderstate_gen7.h |   11 ++
>  drivers/gpu/drm/i915/intel_renderstate_gen8.h |   11 ++

Imo static const arrays in headers look ugly ... I prefer we just add
normal C files with forward declarations in i915_gem_render_state.c.

In the i915/Makefile you could add a new section for all these golden
renderstate files. I know this is a bikeshed, but ;-)
-Daniel

>  7 files changed, 265 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index b1445b7..b5d4029 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
>  # GEM code
>  i915-y += i915_cmd_parser.o \
>  	  i915_gem_context.o \
> +	  i915_gem_render_state.o \
>  	  i915_gem_debug.o \
>  	  i915_gem_dmabuf.o \
>  	  i915_gem_evict.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 43b022c..f6ae2ee 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  				   struct drm_file *file);
>  
> +/* i915_gem_render_state.c */
> +int i915_gem_init_render_state(struct intel_ring_buffer *ring);
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct drm_device *dev,
>  					  struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 30b355a..d648d4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring,
>  		/* obj is kept alive until the next request by its active ref */
>  		i915_gem_object_ggtt_unpin(from->obj);
>  		i915_gem_context_unreference(from);
> +	} else {
> +		if (ring->id == RCS && to->is_initialized == false) {
> +
> +			ret = i915_gem_init_render_state(ring);
> +			if (ret)
> +				DRM_ERROR("init render state: %d\n", ret);
> +		}
>  	}
>  
>  	to->is_initialized = true;
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> new file mode 100644
> index 0000000..409f99b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -0,0 +1,222 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Mika Kuoppala <mika.kuoppala@intel.com>
> + *
> + */
> +
> +#include "i915_drv.h"
> +
> +#include "intel_renderstate_gen6.h"
> +#include "intel_renderstate_gen7.h"
> +#include "intel_renderstate_gen8.h"
> +
> +#define BATCH_MAX_SIZE 4096
> +
> +struct i915_render_state {
> +	struct drm_i915_gem_object *obj;
> +	unsigned long ggtt_offset;
> +	void *batch;
> +	u32 size;
> +	u32 len;
> +};
> +
> +static struct i915_render_state *
> +render_state_alloc(struct drm_device *dev, unsigned int size)
> +{
> +	struct i915_render_state *so;
> +	int ret;
> +
> +	so = kzalloc(sizeof(*so), GFP_KERNEL);
> +	if (!so)
> +		return ERR_PTR(-ENOMEM);
> +
> +	so->size = ALIGN(size, 4096);
> +
> +	so->obj = i915_gem_alloc_object(dev, so->size);
> +	if (so->obj == NULL) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
> +	if (ret)
> +		goto free_gem;
> +
> +	so->batch = i915_gem_vmap_obj(so->obj);
> +	if (!so->batch) {
> +		ret = -ENOMEM;
> +		goto unpin;
> +	}
> +
> +	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
> +
> +	return so;
> +unpin:
> +	i915_gem_object_ggtt_unpin(so->obj);
> +free_gem:
> +	drm_gem_object_unreference(&so->obj->base);
> +free:
> +	kfree(so);
> +	return ERR_PTR(ret);
> +}
> +
> +static void render_state_free(struct i915_render_state *so)
> +{
> +	vunmap(so->batch);
> +	i915_gem_object_ggtt_unpin(so->obj);
> +	drm_gem_object_unreference(&so->obj->base);
> +	kfree(so);
> +}
> +
> +struct render_state_ro_data {
> +	const u32 *batch;
> +	unsigned int batch_items;
> +	const u32 *reloc;
> +	unsigned int reloc_items;
> +	bool reloc_64bit;
> +};
> +
> +static int render_state_copy(struct i915_render_state *so,
> +			     const struct render_state_ro_data *source)
> +{
> +	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
> +	const unsigned int num_relocs = source->reloc_items;
> +	int reloc_index = 0;
> +	u32 *d = (uint32_t *)so->batch;
> +	unsigned int i = 0;
> +
> +	if (source->batch_items * sizeof(*source->batch) > so->size)
> +		return -EINVAL;
> +
> +	while (i < source->batch_items) {
> +		u32 s = source->batch[i];
> +
> +		if (reloc_index < num_relocs &&
> +		    i * 4  == source->reloc[reloc_index]) {
> +
> +			s += goffset & 0xffffffff;
> +
> +			/* We keep batch offsets max 32bit */
> +			if (source->reloc_64bit) {
> +				if (i + 1 >= source->batch_items ||
> +				    source->batch[i + 1] != 0)
> +					return -EINVAL;
> +
> +				d[i] = s;
> +				i++;
> +				s = (goffset & 0xffffffff00000000ull) >> 32;
> +			}
> +
> +			reloc_index++;
> +		}
> +
> +		d[i] = s;
> +		i++;
> +	}
> +
> +	if (num_relocs != reloc_index) {
> +		DRM_ERROR("not all relocs resolved, %d out of %d\n",
> +			  reloc_index, num_relocs);
> +		return -EINVAL;
> +	}
> +
> +	so->len = source->batch_items * sizeof(*source->batch);
> +
> +	return 0;
> +}
> +
> +static int render_state_get(struct i915_render_state *so, int gen)
> +{
> +	struct render_state_ro_data ro_data;
> +
> +#define RS_SETUP(_d, _g, _r) {						\
> +		if (!sizeof(gen ## _g ## _null_state_batch))		\
> +			return -EINVAL;					\
> +		if (sizeof(gen ## _g ## _null_state_batch) & 0x3)	\
> +			return -EINVAL;					\
> +		if (sizeof(gen ## _g ## _null_state_relocs) & 0x3)	\
> +			return -EINVAL;					\
> +		_d.batch = gen ## _g ## _null_state_batch;		\
> +		_d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \
> +		_d.reloc = gen ## _g ## _null_state_relocs;		\
> +		_d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \
> +		_d.reloc_64bit = _r;					\
> +	}
> +
> +	switch (gen) {
> +	case 6:
> +		RS_SETUP(ro_data, 6, false);
> +		break;
> +	case 7:
> +		RS_SETUP(ro_data, 7, false);
> +		break;
> +	case 8:
> +		RS_SETUP(ro_data, 8, true);
> +		break;
> +	default:
> +		return -ENOENT;
> +	}
> +#undef RS_SETUP
> +
> +	return render_state_copy(so, &ro_data);
> +}
> +
> +int i915_gem_init_render_state(struct intel_ring_buffer *ring)
> +{
> +	const int gen = INTEL_INFO(ring->dev)->gen;
> +	struct i915_render_state *so;
> +	u32 seqno;
> +	int ret;
> +
> +	if (gen < 6)
> +		return 0;
> +
> +	so = render_state_alloc(ring->dev, BATCH_MAX_SIZE);
> +	if (IS_ERR(so))
> +		return PTR_ERR(so);
> +
> +	ret = render_state_get(so, gen);
> +	if (ret)
> +		goto out;
> +
> +	ret = ring->dispatch_execbuffer(ring,
> +					i915_gem_obj_ggtt_offset(so->obj),
> +					so->len,
> +					I915_DISPATCH_SECURE);
> +	if (ret)
> +		goto out;
> +
> +	ret = intel_ring_flush_all_caches(ring);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_add_request(ring, &seqno);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_wait_seqno(ring, seqno);
> +out:
> +	render_state_free(so);
> +	return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
> new file mode 100644
> index 0000000..4809f2f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN6
> +#define __INTEL_RENDERSTATE_GEN6
> +
> +static const uint32_t gen6_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen6_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN6 */
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
> new file mode 100644
> index 0000000..9b1420b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN7
> +#define __INTEL_RENDERSTATE_GEN7
> +
> +static const uint32_t gen7_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen7_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN7 */
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
> new file mode 100644
> index 0000000..d349dda
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN8
> +#define __INTEL_RENDERSTATE_GEN8
> +
> +static const uint32_t gen8_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen8_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN8 */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ben Widawsky May 6, 2014, 1:20 a.m. UTC | #2
On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote:
> HW guys say that it is not a cool idea to let device
> go into rc6 without proper 3d pipeline state.
> 
> For each new uninitialized context, generate a
> valid null render state to be run on context
> creation.
> 
> This patch introduces a skeleton with emty states.
> 
> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
> ---
>  drivers/gpu/drm/i915/Makefile                 |    1 +
>  drivers/gpu/drm/i915/i915_drv.h               |    2 +
>  drivers/gpu/drm/i915/i915_gem_context.c       |    7 +
>  drivers/gpu/drm/i915/i915_gem_render_state.c  |  222 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_renderstate_gen6.h |   11 ++
>  drivers/gpu/drm/i915/intel_renderstate_gen7.h |   11 ++
>  drivers/gpu/drm/i915/intel_renderstate_gen8.h |   11 ++
>  7 files changed, 265 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h
>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index b1445b7..b5d4029 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
>  # GEM code
>  i915-y += i915_cmd_parser.o \
>  	  i915_gem_context.o \
> +	  i915_gem_render_state.o \
>  	  i915_gem_debug.o \
>  	  i915_gem_dmabuf.o \
>  	  i915_gem_evict.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 43b022c..f6ae2ee 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>  				   struct drm_file *file);
>  
> +/* i915_gem_render_state.c */
> +int i915_gem_init_render_state(struct intel_ring_buffer *ring);
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct drm_device *dev,
>  					  struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 30b355a..d648d4d 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring,
>  		/* obj is kept alive until the next request by its active ref */
>  		i915_gem_object_ggtt_unpin(from->obj);
>  		i915_gem_context_unreference(from);
> +	} else {
> +		if (ring->id == RCS && to->is_initialized == false) {
> +
> +			ret = i915_gem_init_render_state(ring);
> +			if (ret)
> +				DRM_ERROR("init render state: %d\n", ret);
> +		}

The way I thought we'd do this was to emit the state once for the
default context, and copy those pages to the BO of the new context.

>  	}
>  
>  	to->is_initialized = true;
> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
> new file mode 100644
> index 0000000..409f99b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
> @@ -0,0 +1,222 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Mika Kuoppala <mika.kuoppala@intel.com>
> + *
> + */
> +
> +#include "i915_drv.h"
> +
> +#include "intel_renderstate_gen6.h"
> +#include "intel_renderstate_gen7.h"
> +#include "intel_renderstate_gen8.h"
> +
> +#define BATCH_MAX_SIZE 4096
> +
> +struct i915_render_state {
> +	struct drm_i915_gem_object *obj;
> +	unsigned long ggtt_offset;
> +	void *batch;
> +	u32 size;
> +	u32 len;
> +};
> +
> +static struct i915_render_state *
> +render_state_alloc(struct drm_device *dev, unsigned int size)
> +{
> +	struct i915_render_state *so;
> +	int ret;
> +
> +	so = kzalloc(sizeof(*so), GFP_KERNEL);
> +	if (!so)
> +		return ERR_PTR(-ENOMEM);
> +
> +	so->size = ALIGN(size, 4096);
> +
> +	so->obj = i915_gem_alloc_object(dev, so->size);
> +	if (so->obj == NULL) {
> +		ret = -ENOMEM;
> +		goto free;
> +	}
> +
> +	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
> +	if (ret)
> +		goto free_gem;

Using dispatch the way you do below, we'll actually be executing out of
the PPGTT, and not the global. While this makes no difference on the
current code, it does make a difference for some of my plans. I don't
see a clean way to fix this with the current code base, I am just
complaining.

> +
> +	so->batch = i915_gem_vmap_obj(so->obj);
> +	if (!so->batch) {
> +		ret = -ENOMEM;
> +		goto unpin;
> +	}
> +
> +	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
> +
> +	return so;
> +unpin:
> +	i915_gem_object_ggtt_unpin(so->obj);
> +free_gem:
> +	drm_gem_object_unreference(&so->obj->base);
> +free:
> +	kfree(so);
> +	return ERR_PTR(ret);
> +}
> +
> +static void render_state_free(struct i915_render_state *so)
> +{
> +	vunmap(so->batch);
> +	i915_gem_object_ggtt_unpin(so->obj);
> +	drm_gem_object_unreference(&so->obj->base);
> +	kfree(so);
> +}
> +
> +struct render_state_ro_data {
> +	const u32 *batch;
> +	unsigned int batch_items;
> +	const u32 *reloc;
> +	unsigned int reloc_items;
> +	bool reloc_64bit;
> +};
> +
> +static int render_state_copy(struct i915_render_state *so,
> +			     const struct render_state_ro_data *source)
> +{
> +	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
> +	const unsigned int num_relocs = source->reloc_items;
> +	int reloc_index = 0;
> +	u32 *d = (uint32_t *)so->batch;
> +	unsigned int i = 0;
> +
> +	if (source->batch_items * sizeof(*source->batch) > so->size)
> +		return -EINVAL;
> +
> +	while (i < source->batch_items) {
> +		u32 s = source->batch[i];
> +
> +		if (reloc_index < num_relocs &&
> +		    i * 4  == source->reloc[reloc_index]) {
> +
> +			s += goffset & 0xffffffff;
> +
> +			/* We keep batch offsets max 32bit */
> +			if (source->reloc_64bit) {
> +				if (i + 1 >= source->batch_items ||
> +				    source->batch[i + 1] != 0)
> +					return -EINVAL;
> +
> +				d[i] = s;
> +				i++;
> +				s = (goffset & 0xffffffff00000000ull) >> 32;
> +			}
> +
> +			reloc_index++;
> +		}
> +
> +		d[i] = s;
> +		i++;
> +	}
> +
> +	if (num_relocs != reloc_index) {
> +		DRM_ERROR("not all relocs resolved, %d out of %d\n",
> +			  reloc_index, num_relocs);
> +		return -EINVAL;
> +	}
> +
> +	so->len = source->batch_items * sizeof(*source->batch);
> +
> +	return 0;
> +}
> +
> +static int render_state_get(struct i915_render_state *so, int gen)
> +{
> +	struct render_state_ro_data ro_data;
> +
> +#define RS_SETUP(_d, _g, _r) {						\
> +		if (!sizeof(gen ## _g ## _null_state_batch))		\
> +			return -EINVAL;					\
> +		if (sizeof(gen ## _g ## _null_state_batch) & 0x3)	\
> +			return -EINVAL;					\
> +		if (sizeof(gen ## _g ## _null_state_relocs) & 0x3)	\
> +			return -EINVAL;					\
> +		_d.batch = gen ## _g ## _null_state_batch;		\
> +		_d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \
> +		_d.reloc = gen ## _g ## _null_state_relocs;		\
> +		_d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \
> +		_d.reloc_64bit = _r;					\
> +	}
> +
> +	switch (gen) {
> +	case 6:
> +		RS_SETUP(ro_data, 6, false);
> +		break;
> +	case 7:
> +		RS_SETUP(ro_data, 7, false);
> +		break;

I'm a bit surprised we have no differences for HSW/BYT/IVB.

> +	case 8:
> +		RS_SETUP(ro_data, 8, true);
> +		break;
> +	default:
> +		return -ENOENT;
> +	}
> +#undef RS_SETUP
> +
> +	return render_state_copy(so, &ro_data);
> +}
> +
> +int i915_gem_init_render_state(struct intel_ring_buffer *ring)
> +{
> +	const int gen = INTEL_INFO(ring->dev)->gen;
> +	struct i915_render_state *so;
> +	u32 seqno;
> +	int ret;
> +
> +	if (gen < 6)
> +		return 0;

HAS_HW_CONTEXTS
ickle: "Never forget ILK contexts"

> +
> +	so = render_state_alloc(ring->dev, BATCH_MAX_SIZE);
> +	if (IS_ERR(so))
> +		return PTR_ERR(so);
> +
> +	ret = render_state_get(so, gen);
> +	if (ret)
> +		goto out;
> +
> +	ret = ring->dispatch_execbuffer(ring,
> +					i915_gem_obj_ggtt_offset(so->obj),
> +					so->len,
> +					I915_DISPATCH_SECURE);
> +	if (ret)
> +		goto out;
> +
> +	ret = intel_ring_flush_all_caches(ring);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_add_request(ring, &seqno);
> +	if (ret)
> +		goto out;
> +
> +	ret = i915_wait_seqno(ring, seqno);

This wait is going to hurt. Not seeing why it's needed (plus I still
recommend my idea on the top).

> +out:
> +	render_state_free(so);
> +	return ret;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
> new file mode 100644
> index 0000000..4809f2f
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN6
> +#define __INTEL_RENDERSTATE_GEN6
> +
> +static const uint32_t gen6_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen6_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN6 */
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
> new file mode 100644
> index 0000000..9b1420b
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN7
> +#define __INTEL_RENDERSTATE_GEN7
> +
> +static const uint32_t gen7_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen7_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN7 */
> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
> new file mode 100644
> index 0000000..d349dda
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
> @@ -0,0 +1,11 @@
> +#ifndef __INTEL_RENDERSTATE_GEN8
> +#define __INTEL_RENDERSTATE_GEN8
> +
> +static const uint32_t gen8_null_state_relocs[] = {
> +};
> +
> +static const uint32_t gen8_null_state_batch[] = {
> +	MI_BATCH_BUFFER_END,
> +};
> +
> +#endif /* __INTEL_RENDERSTATE_GEN8 */
> -- 
> 1.7.9.5
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Mika Kuoppala May 6, 2014, 1:33 p.m. UTC | #3
Ben Widawsky <ben@bwidawsk.net> writes:

> On Tue, Apr 22, 2014 at 08:19:43PM +0300, Mika Kuoppala wrote:
>> HW guys say that it is not a cool idea to let device
>> go into rc6 without proper 3d pipeline state.
>> 
>> For each new uninitialized context, generate a
>> valid null render state to be run on context
>> creation.
>> 
>> This patch introduces a skeleton with emty states.
>> 
>> Signed-off-by: Mika Kuoppala <mika.kuoppala@intel.com>
>> ---
>>  drivers/gpu/drm/i915/Makefile                 |    1 +
>>  drivers/gpu/drm/i915/i915_drv.h               |    2 +
>>  drivers/gpu/drm/i915/i915_gem_context.c       |    7 +
>>  drivers/gpu/drm/i915/i915_gem_render_state.c  |  222 +++++++++++++++++++++++++
>>  drivers/gpu/drm/i915/intel_renderstate_gen6.h |   11 ++
>>  drivers/gpu/drm/i915/intel_renderstate_gen7.h |   11 ++
>>  drivers/gpu/drm/i915/intel_renderstate_gen8.h |   11 ++
>>  7 files changed, 265 insertions(+)
>>  create mode 100644 drivers/gpu/drm/i915/i915_gem_render_state.c
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen6.h
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen7.h
>>  create mode 100644 drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> 
>> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
>> index b1445b7..b5d4029 100644
>> --- a/drivers/gpu/drm/i915/Makefile
>> +++ b/drivers/gpu/drm/i915/Makefile
>> @@ -18,6 +18,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
>>  # GEM code
>>  i915-y += i915_cmd_parser.o \
>>  	  i915_gem_context.o \
>> +	  i915_gem_render_state.o \
>>  	  i915_gem_debug.o \
>>  	  i915_gem_dmabuf.o \
>>  	  i915_gem_evict.o \
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 43b022c..f6ae2ee 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2330,6 +2330,8 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
>>  int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
>>  				   struct drm_file *file);
>>  
>> +/* i915_gem_render_state.c */
>> +int i915_gem_init_render_state(struct intel_ring_buffer *ring);
>>  /* i915_gem_evict.c */
>>  int __must_check i915_gem_evict_something(struct drm_device *dev,
>>  					  struct i915_address_space *vm,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
>> index 30b355a..d648d4d 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>> @@ -746,6 +746,13 @@ static int do_switch(struct intel_ring_buffer *ring,
>>  		/* obj is kept alive until the next request by its active ref */
>>  		i915_gem_object_ggtt_unpin(from->obj);
>>  		i915_gem_context_unreference(from);
>> +	} else {
>> +		if (ring->id == RCS && to->is_initialized == false) {
>> +
>> +			ret = i915_gem_init_render_state(ring);
>> +			if (ret)
>> +				DRM_ERROR("init render state: %d\n", ret);
>> +		}
>
> The way I thought we'd do this was to emit the state once for the
> default context, and copy those pages to the BO of the new context.

The state is big (on bdw), 18 pages. More on cover letter of v2 series:
1399382766-25116-1-git-send-email-mika.kuoppala@intel.com

>>  	}
>>  
>>  	to->is_initialized = true;
>> diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> new file mode 100644
>> index 0000000..409f99b
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
>> @@ -0,0 +1,222 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Mika Kuoppala <mika.kuoppala@intel.com>
>> + *
>> + */
>> +
>> +#include "i915_drv.h"
>> +
>> +#include "intel_renderstate_gen6.h"
>> +#include "intel_renderstate_gen7.h"
>> +#include "intel_renderstate_gen8.h"
>> +
>> +#define BATCH_MAX_SIZE 4096
>> +
>> +struct i915_render_state {
>> +	struct drm_i915_gem_object *obj;
>> +	unsigned long ggtt_offset;
>> +	void *batch;
>> +	u32 size;
>> +	u32 len;
>> +};
>> +
>> +static struct i915_render_state *
>> +render_state_alloc(struct drm_device *dev, unsigned int size)
>> +{
>> +	struct i915_render_state *so;
>> +	int ret;
>> +
>> +	so = kzalloc(sizeof(*so), GFP_KERNEL);
>> +	if (!so)
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	so->size = ALIGN(size, 4096);
>> +
>> +	so->obj = i915_gem_alloc_object(dev, so->size);
>> +	if (so->obj == NULL) {
>> +		ret = -ENOMEM;
>> +		goto free;
>> +	}
>> +
>> +	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
>> +	if (ret)
>> +		goto free_gem;
>
> Using dispatch the way you do below, we'll actually be executing out of
> the PPGTT, and not the global. While this makes no difference on the
> current code, it does make a difference for some of my plans. I don't
> see a clean way to fix this with the current code base, I am just
> complaining.
>
>> +
>> +	so->batch = i915_gem_vmap_obj(so->obj);
>> +	if (!so->batch) {
>> +		ret = -ENOMEM;
>> +		goto unpin;
>> +	}
>> +
>> +	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
>> +
>> +	return so;
>> +unpin:
>> +	i915_gem_object_ggtt_unpin(so->obj);
>> +free_gem:
>> +	drm_gem_object_unreference(&so->obj->base);
>> +free:
>> +	kfree(so);
>> +	return ERR_PTR(ret);
>> +}
>> +
>> +static void render_state_free(struct i915_render_state *so)
>> +{
>> +	vunmap(so->batch);
>> +	i915_gem_object_ggtt_unpin(so->obj);
>> +	drm_gem_object_unreference(&so->obj->base);
>> +	kfree(so);
>> +}
>> +
>> +struct render_state_ro_data {
>> +	const u32 *batch;
>> +	unsigned int batch_items;
>> +	const u32 *reloc;
>> +	unsigned int reloc_items;
>> +	bool reloc_64bit;
>> +};
>> +
>> +static int render_state_copy(struct i915_render_state *so,
>> +			     const struct render_state_ro_data *source)
>> +{
>> +	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
>> +	const unsigned int num_relocs = source->reloc_items;
>> +	int reloc_index = 0;
>> +	u32 *d = (uint32_t *)so->batch;
>> +	unsigned int i = 0;
>> +
>> +	if (source->batch_items * sizeof(*source->batch) > so->size)
>> +		return -EINVAL;
>> +
>> +	while (i < source->batch_items) {
>> +		u32 s = source->batch[i];
>> +
>> +		if (reloc_index < num_relocs &&
>> +		    i * 4  == source->reloc[reloc_index]) {
>> +
>> +			s += goffset & 0xffffffff;
>> +
>> +			/* We keep batch offsets max 32bit */
>> +			if (source->reloc_64bit) {
>> +				if (i + 1 >= source->batch_items ||
>> +				    source->batch[i + 1] != 0)
>> +					return -EINVAL;
>> +
>> +				d[i] = s;
>> +				i++;
>> +				s = (goffset & 0xffffffff00000000ull) >> 32;
>> +			}
>> +
>> +			reloc_index++;
>> +		}
>> +
>> +		d[i] = s;
>> +		i++;
>> +	}
>> +
>> +	if (num_relocs != reloc_index) {
>> +		DRM_ERROR("not all relocs resolved, %d out of %d\n",
>> +			  reloc_index, num_relocs);
>> +		return -EINVAL;
>> +	}
>> +
>> +	so->len = source->batch_items * sizeof(*source->batch);
>> +
>> +	return 0;
>> +}
>> +
>> +static int render_state_get(struct i915_render_state *so, int gen)
>> +{
>> +	struct render_state_ro_data ro_data;
>> +
>> +#define RS_SETUP(_d, _g, _r) {						\
>> +		if (!sizeof(gen ## _g ## _null_state_batch))		\
>> +			return -EINVAL;					\
>> +		if (sizeof(gen ## _g ## _null_state_batch) & 0x3)	\
>> +			return -EINVAL;					\
>> +		if (sizeof(gen ## _g ## _null_state_relocs) & 0x3)	\
>> +			return -EINVAL;					\
>> +		_d.batch = gen ## _g ## _null_state_batch;		\
>> +		_d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \
>> +		_d.reloc = gen ## _g ## _null_state_relocs;		\
>> +		_d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \
>> +		_d.reloc_64bit = _r;					\
>> +	}
>> +
>> +	switch (gen) {
>> +	case 6:
>> +		RS_SETUP(ro_data, 6, false);
>> +		break;
>> +	case 7:
>> +		RS_SETUP(ro_data, 7, false);
>> +		break;
>
> I'm a bit surprised we have no differences for HSW/BYT/IVB.

The state generators need eyes on them for sure. There was some difference
between ivb/hsw in rendercopy. But for the null state, I gathered it
doesnt matter.

>> +	case 8:
>> +		RS_SETUP(ro_data, 8, true);
>> +		break;
>> +	default:
>> +		return -ENOENT;
>> +	}
>> +#undef RS_SETUP
>> +
>> +	return render_state_copy(so, &ro_data);
>> +}
>> +
>> +int i915_gem_init_render_state(struct intel_ring_buffer *ring)
>> +{
>> +	const int gen = INTEL_INFO(ring->dev)->gen;
>> +	struct i915_render_state *so;
>> +	u32 seqno;
>> +	int ret;
>> +
>> +	if (gen < 6)
>> +		return 0;
>
> HAS_HW_CONTEXTS
> ickle: "Never forget ILK contexts"

We have states only for >= snb. So thus the bailout.
And do_switch is not called with fake contexts.

>> +
>> +	so = render_state_alloc(ring->dev, BATCH_MAX_SIZE);
>> +	if (IS_ERR(so))
>> +		return PTR_ERR(so);
>> +
>> +	ret = render_state_get(so, gen);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = ring->dispatch_execbuffer(ring,
>> +					i915_gem_obj_ggtt_offset(so->obj),
>> +					so->len,
>> +					I915_DISPATCH_SECURE);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = intel_ring_flush_all_caches(ring);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = i915_add_request(ring, &seqno);
>> +	if (ret)
>> +		goto out;
>> +
>> +	ret = i915_wait_seqno(ring, seqno);
>
> This wait is going to hurt. Not seeing why it's needed (plus I still
> recommend my idea on the top).

I couldn't come up with any explanations either. Removed in v2

Thanks,
-Mika

>> +out:
>> +	render_state_free(so);
>> +	return ret;
>> +}
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
>> new file mode 100644
>> index 0000000..4809f2f
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN6
>> +#define __INTEL_RENDERSTATE_GEN6
>> +
>> +static const uint32_t gen6_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen6_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN6 */
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
>> new file mode 100644
>> index 0000000..9b1420b
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN7
>> +#define __INTEL_RENDERSTATE_GEN7
>> +
>> +static const uint32_t gen7_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen7_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN7 */
>> diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> new file mode 100644
>> index 0000000..d349dda
>> --- /dev/null
>> +++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
>> @@ -0,0 +1,11 @@
>> +#ifndef __INTEL_RENDERSTATE_GEN8
>> +#define __INTEL_RENDERSTATE_GEN8
>> +
>> +static const uint32_t gen8_null_state_relocs[] = {
>> +};
>> +
>> +static const uint32_t gen8_null_state_batch[] = {
>> +	MI_BATCH_BUFFER_END,
>> +};
>> +
>> +#endif /* __INTEL_RENDERSTATE_GEN8 */
>> -- 
>> 1.7.9.5
>> 
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> -- 
> Ben Widawsky, Intel Open Source Technology Center
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b1445b7..b5d4029 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -18,6 +18,7 @@  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o
 # GEM code
 i915-y += i915_cmd_parser.o \
 	  i915_gem_context.o \
+	  i915_gem_render_state.o \
 	  i915_gem_debug.o \
 	  i915_gem_dmabuf.o \
 	  i915_gem_evict.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 43b022c..f6ae2ee 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2330,6 +2330,8 @@  int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file);
 
+/* i915_gem_render_state.c */
+int i915_gem_init_render_state(struct intel_ring_buffer *ring);
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct drm_device *dev,
 					  struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 30b355a..d648d4d 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -746,6 +746,13 @@  static int do_switch(struct intel_ring_buffer *ring,
 		/* obj is kept alive until the next request by its active ref */
 		i915_gem_object_ggtt_unpin(from->obj);
 		i915_gem_context_unreference(from);
+	} else {
+		if (ring->id == RCS && to->is_initialized == false) {
+
+			ret = i915_gem_init_render_state(ring);
+			if (ret)
+				DRM_ERROR("init render state: %d\n", ret);
+		}
 	}
 
 	to->is_initialized = true;
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
new file mode 100644
index 0000000..409f99b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -0,0 +1,222 @@ 
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Mika Kuoppala <mika.kuoppala@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+
+#include "intel_renderstate_gen6.h"
+#include "intel_renderstate_gen7.h"
+#include "intel_renderstate_gen8.h"
+
+#define BATCH_MAX_SIZE 4096
+
+struct i915_render_state {
+	struct drm_i915_gem_object *obj;
+	unsigned long ggtt_offset;
+	void *batch;
+	u32 size;
+	u32 len;
+};
+
+static struct i915_render_state *
+render_state_alloc(struct drm_device *dev, unsigned int size)
+{
+	struct i915_render_state *so;
+	int ret;
+
+	so = kzalloc(sizeof(*so), GFP_KERNEL);
+	if (!so)
+		return ERR_PTR(-ENOMEM);
+
+	so->size = ALIGN(size, 4096);
+
+	so->obj = i915_gem_alloc_object(dev, so->size);
+	if (so->obj == NULL) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
+	if (ret)
+		goto free_gem;
+
+	so->batch = i915_gem_vmap_obj(so->obj);
+	if (!so->batch) {
+		ret = -ENOMEM;
+		goto unpin;
+	}
+
+	so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
+
+	return so;
+unpin:
+	i915_gem_object_ggtt_unpin(so->obj);
+free_gem:
+	drm_gem_object_unreference(&so->obj->base);
+free:
+	kfree(so);
+	return ERR_PTR(ret);
+}
+
+static void render_state_free(struct i915_render_state *so)
+{
+	vunmap(so->batch);
+	i915_gem_object_ggtt_unpin(so->obj);
+	drm_gem_object_unreference(&so->obj->base);
+	kfree(so);
+}
+
+struct render_state_ro_data {
+	const u32 *batch;
+	unsigned int batch_items;
+	const u32 *reloc;
+	unsigned int reloc_items;
+	bool reloc_64bit;
+};
+
+static int render_state_copy(struct i915_render_state *so,
+			     const struct render_state_ro_data *source)
+{
+	const u64 goffset = i915_gem_obj_ggtt_offset(so->obj);
+	const unsigned int num_relocs = source->reloc_items;
+	int reloc_index = 0;
+	u32 *d = (uint32_t *)so->batch;
+	unsigned int i = 0;
+
+	if (source->batch_items * sizeof(*source->batch) > so->size)
+		return -EINVAL;
+
+	while (i < source->batch_items) {
+		u32 s = source->batch[i];
+
+		if (reloc_index < num_relocs &&
+		    i * 4  == source->reloc[reloc_index]) {
+
+			s += goffset & 0xffffffff;
+
+			/* We keep batch offsets max 32bit */
+			if (source->reloc_64bit) {
+				if (i + 1 >= source->batch_items ||
+				    source->batch[i + 1] != 0)
+					return -EINVAL;
+
+				d[i] = s;
+				i++;
+				s = (goffset & 0xffffffff00000000ull) >> 32;
+			}
+
+			reloc_index++;
+		}
+
+		d[i] = s;
+		i++;
+	}
+
+	if (num_relocs != reloc_index) {
+		DRM_ERROR("not all relocs resolved, %d out of %d\n",
+			  reloc_index, num_relocs);
+		return -EINVAL;
+	}
+
+	so->len = source->batch_items * sizeof(*source->batch);
+
+	return 0;
+}
+
+static int render_state_get(struct i915_render_state *so, int gen)
+{
+	struct render_state_ro_data ro_data;
+
+#define RS_SETUP(_d, _g, _r) {						\
+		if (!sizeof(gen ## _g ## _null_state_batch))		\
+			return -EINVAL;					\
+		if (sizeof(gen ## _g ## _null_state_batch) & 0x3)	\
+			return -EINVAL;					\
+		if (sizeof(gen ## _g ## _null_state_relocs) & 0x3)	\
+			return -EINVAL;					\
+		_d.batch = gen ## _g ## _null_state_batch;		\
+		_d.batch_items = sizeof(gen ##_g ## _null_state_batch) / 4; \
+		_d.reloc = gen ## _g ## _null_state_relocs;		\
+		_d.reloc_items = sizeof(gen ## _g ## _null_state_relocs) / 4; \
+		_d.reloc_64bit = _r;					\
+	}
+
+	switch (gen) {
+	case 6:
+		RS_SETUP(ro_data, 6, false);
+		break;
+	case 7:
+		RS_SETUP(ro_data, 7, false);
+		break;
+	case 8:
+		RS_SETUP(ro_data, 8, true);
+		break;
+	default:
+		return -ENOENT;
+	}
+#undef RS_SETUP
+
+	return render_state_copy(so, &ro_data);
+}
+
+int i915_gem_init_render_state(struct intel_ring_buffer *ring)
+{
+	const int gen = INTEL_INFO(ring->dev)->gen;
+	struct i915_render_state *so;
+	u32 seqno;
+	int ret;
+
+	if (gen < 6)
+		return 0;
+
+	so = render_state_alloc(ring->dev, BATCH_MAX_SIZE);
+	if (IS_ERR(so))
+		return PTR_ERR(so);
+
+	ret = render_state_get(so, gen);
+	if (ret)
+		goto out;
+
+	ret = ring->dispatch_execbuffer(ring,
+					i915_gem_obj_ggtt_offset(so->obj),
+					so->len,
+					I915_DISPATCH_SECURE);
+	if (ret)
+		goto out;
+
+	ret = intel_ring_flush_all_caches(ring);
+	if (ret)
+		goto out;
+
+	ret = i915_add_request(ring, &seqno);
+	if (ret)
+		goto out;
+
+	ret = i915_wait_seqno(ring, seqno);
+out:
+	render_state_free(so);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen6.h b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
new file mode 100644
index 0000000..4809f2f
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen6.h
@@ -0,0 +1,11 @@ 
+#ifndef __INTEL_RENDERSTATE_GEN6
+#define __INTEL_RENDERSTATE_GEN6
+
+static const uint32_t gen6_null_state_relocs[] = {
+};
+
+static const uint32_t gen6_null_state_batch[] = {
+	MI_BATCH_BUFFER_END,
+};
+
+#endif /* __INTEL_RENDERSTATE_GEN6 */
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen7.h b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
new file mode 100644
index 0000000..9b1420b
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen7.h
@@ -0,0 +1,11 @@ 
+#ifndef __INTEL_RENDERSTATE_GEN7
+#define __INTEL_RENDERSTATE_GEN7
+
+static const uint32_t gen7_null_state_relocs[] = {
+};
+
+static const uint32_t gen7_null_state_batch[] = {
+	MI_BATCH_BUFFER_END,
+};
+
+#endif /* __INTEL_RENDERSTATE_GEN7 */
diff --git a/drivers/gpu/drm/i915/intel_renderstate_gen8.h b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
new file mode 100644
index 0000000..d349dda
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_renderstate_gen8.h
@@ -0,0 +1,11 @@ 
+#ifndef __INTEL_RENDERSTATE_GEN8
+#define __INTEL_RENDERSTATE_GEN8
+
+static const uint32_t gen8_null_state_relocs[] = {
+};
+
+static const uint32_t gen8_null_state_batch[] = {
+	MI_BATCH_BUFFER_END,
+};
+
+#endif /* __INTEL_RENDERSTATE_GEN8 */