diff mbox

[61/66] drm/i915: Use multiple VMs

Message ID 1372375867-1003-62-git-send-email-ben@bwidawsk.net (mailing list archive)
State New, archived
Headers show

Commit Message

Ben Widawsky June 27, 2013, 11:31 p.m. UTC
This requires doing an actual switch of the page tables during the
context switch/execbuf.

Along the way, cut away as much "aliasing" ppgtt as possible

Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
---
 drivers/gpu/drm/i915/i915_gem.c            | 22 +++++++++++++---------
 drivers/gpu/drm/i915/i915_gem_context.c    | 29 +++++++++++++++++------------
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++-------
 3 files changed, 50 insertions(+), 28 deletions(-)

Comments

Ben Widawsky June 27, 2013, 11:43 p.m. UTC | #1
On Thu, Jun 27, 2013 at 04:31:02PM -0700, Ben Widawsky wrote:
> This requires doing an actual switch of the page tables during the
> context switch/execbuf.
> 
> Along the way, cut away as much "aliasing" ppgtt as possible
> 
> Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> ---
>  drivers/gpu/drm/i915/i915_gem.c            | 22 +++++++++++++---------
>  drivers/gpu/drm/i915/i915_gem_context.c    | 29 +++++++++++++++++------------
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++-------
>  3 files changed, 50 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index af0150e..f05d585 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request)
>  	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
>  	struct i915_address_space *vm;
>  
> -	vm = &dev_priv->gtt.base;
> +	if (request->ctx)
> +		vm = &request->ctx->ppgtt.base;
> +	else
> +		vm = &dev_priv->gtt.base;
>  
>  	return vm;
>  }
> @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj,
>  
>  	if (obj->has_global_gtt_mapping && is_i915_ggtt(vm))
>  		i915_gem_gtt_unbind_object(obj);
> -	if (obj->has_aliasing_ppgtt_mapping) {
> -		i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj);
> -		obj->has_aliasing_ppgtt_mapping = 0;
> -	}
> +
> +	vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
> +			obj->base.size >> PAGE_SHIFT);
> +
>  	i915_gem_gtt_finish_object(obj);
>  	i915_gem_object_unpin_pages(obj);
>  
> @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
>  				return ret;
>  		}
>  
> -		if (obj->has_global_gtt_mapping)
> +		if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping)
>  			i915_gem_gtt_bind_object(obj, cache_level);
> -		if (obj->has_aliasing_ppgtt_mapping)
> -			i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt,
> -					       obj, cache_level);
> +
> +		vm->insert_entries(vm, obj->pages,
> +				   i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
> +				   cache_level);
>  
>  		i915_gem_obj_set_color(obj, vm, cache_level);
>  	}
> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
> index 37ebfa2..cea036e 100644
> --- a/drivers/gpu/drm/i915/i915_gem_context.c
> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> @@ -526,10 +526,14 @@ static int do_switch(struct intel_ring_buffer *ring,
>  	if (from == to && from->last_ring == ring)
>  		return 0;
>  
> +	ret = to->ppgtt.switch_mm(&to->ppgtt, ring);
> +	if (ret)
> +		return ret;
> +
>  	if (ring != &dev_priv->ring[RCS] && from) {
>  		ret = i915_add_request(ring, NULL);
>  		if (ret)
> -			return ret;
> +			goto err_out;
>  		i915_gem_context_unreference(from);
>  	}
>  
> @@ -538,7 +542,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  
>  	ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false);
>  	if (ret)
> -		return ret;
> +		goto err_out;
>  
>  	/* Clear this page out of any CPU caches for coherent swap-in/out. Note
>  	 * that thanks to write = false in this call and us not setting any gpu
> @@ -546,10 +550,8 @@ static int do_switch(struct intel_ring_buffer *ring,
>  	 * (when switching away from it), this won't block.
>  	 * XXX: We need a real interface to do this instead of trickery. */
>  	ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
> -	if (ret) {
> -		i915_gem_object_unpin(to->obj);
> -		return ret;
> -	}
> +	if (ret)
> +		goto unpin_out;
>  
>  	if (!to->obj->has_global_gtt_mapping)
>  		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
> @@ -560,10 +562,8 @@ static int do_switch(struct intel_ring_buffer *ring,
>  		hw_flags |= MI_FORCE_RESTORE;
>  
>  	ret = mi_set_context(ring, to, hw_flags);
> -	if (ret) {
> -		i915_gem_object_unpin(to->obj);
> -		return ret;
> -	}
> +	if (ret)
> +		goto unpin_out;
>  
>  	/* The backing object for the context is done after switching to the
>  	 * *next* context. Therefore we cannot retire the previous context until
> @@ -593,7 +593,7 @@ static int do_switch(struct intel_ring_buffer *ring,
>  			 * scream.
>  			 */
>  			WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT));
> -			return ret;
> +			goto err_out;
>  		}
>  
>  		i915_gem_object_unpin(from->obj);
> @@ -605,8 +605,13 @@ done:
>  	ring->last_context = to;
>  	to->is_initialized = true;
>  	to->last_ring = ring;
> -
>  	return 0;
> +
> +unpin_out:
> +	i915_gem_object_unpin(to->obj);
> +err_out:
> +	WARN_ON(from->ppgtt.switch_mm(&from->ppgtt, ring));
> +	return ret;
>  }
>  
>  /**
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index aeec8c0..0f6bf3c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -437,11 +437,21 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
>  	}
>  
>  	/* Ensure ppgtt mapping exists if needed */
> -	if (dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> -		i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt,
> -				       obj, obj->cache_level);
> -
> +	if (is_i915_ggtt(vm) &&
> +	    dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
> +		/* FIXME: remove this later */
> +		struct i915_address_space *appgtt =
> +			&dev_priv->gtt.aliasing_ppgtt->base;
> +		unsigned long obj_offset = i915_gem_obj_offset(obj, appgtt);
> +
> +		appgtt->insert_entries(appgtt, obj->pages,
> +				       obj_offset >> PAGE_SHIFT,
> +				       obj->cache_level);
>
I meant to remove this, but I missed it. In theory I don't ever want to
insert Aliasing PPGTT PTEs. Will remove it locally and test it now.
>
>  		obj->has_aliasing_ppgtt_mapping = 1;
> +	} else {
> +		vm->insert_entries(vm, obj->pages,
> +				   i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
> +				   obj->cache_level);
>  	}
>  
>  	if (entry->offset != i915_gem_obj_offset(obj, vm)) {
> @@ -864,7 +874,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	struct i915_hw_context *ctx;
>  	struct i915_address_space *vm;
>  	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> -	u32 exec_start, exec_len;
> +	u32 exec_start = args->batch_start_offset, exec_len;
>  	u32 mask, flags;
>  	int ret, mode, i;
>  	bool need_relocs;
> @@ -1085,8 +1095,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  			goto err;
>  	}
>  
> -	exec_start = i915_gem_obj_offset(batch_obj, vm) +
> -		args->batch_start_offset;
> +	if (batch_obj->has_global_gtt_mapping)
> +		exec_start += i915_gem_ggtt_offset(batch_obj);
> +	else
> +		exec_start += i915_gem_obj_offset(batch_obj, vm);
> +
>  	exec_len = args->batch_len;
>  	if (cliprects) {
>  		for (i = 0; i < args->num_cliprects; i++) {
> -- 
> 1.8.3.1
>
Ville Syrjälä July 2, 2013, 10:58 a.m. UTC | #2
On Thu, Jun 27, 2013 at 04:43:40PM -0700, Ben Widawsky wrote:
> On Thu, Jun 27, 2013 at 04:31:02PM -0700, Ben Widawsky wrote:
> > This requires doing an actual switch of the page tables during the
> > context switch/execbuf.
> > 
> > Along the way, cut away as much "aliasing" ppgtt as possible
> > 
> > Signed-off-by: Ben Widawsky <ben@bwidawsk.net>
> > ---
> >  drivers/gpu/drm/i915/i915_gem.c            | 22 +++++++++++++---------
> >  drivers/gpu/drm/i915/i915_gem_context.c    | 29 +++++++++++++++++------------
> >  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 27 ++++++++++++++++++++-------
> >  3 files changed, 50 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index af0150e..f05d585 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -2170,7 +2170,10 @@ request_to_vm(struct drm_i915_gem_request *request)
> >  	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
> >  	struct i915_address_space *vm;
> >  
> > -	vm = &dev_priv->gtt.base;
> > +	if (request->ctx)
> > +		vm = &request->ctx->ppgtt.base;
> > +	else
> > +		vm = &dev_priv->gtt.base;
> >  
> >  	return vm;
> >  }
> > @@ -2676,10 +2679,10 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj,
> >  
> >  	if (obj->has_global_gtt_mapping && is_i915_ggtt(vm))
> >  		i915_gem_gtt_unbind_object(obj);
> > -	if (obj->has_aliasing_ppgtt_mapping) {
> > -		i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj);
> > -		obj->has_aliasing_ppgtt_mapping = 0;
> > -	}
> > +
> > +	vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
> > +			obj->base.size >> PAGE_SHIFT);
> > +
> >  	i915_gem_gtt_finish_object(obj);
> >  	i915_gem_object_unpin_pages(obj);
> >  
> > @@ -3444,11 +3447,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
> >  				return ret;
> >  		}
> >  
> > -		if (obj->has_global_gtt_mapping)
> > +		if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping)
> >  			i915_gem_gtt_bind_object(obj, cache_level);

Are you planning to kill i915_gem_gtt_(un)bind_object? In many cases you
seem to end up writing the global GTT PTEs twice because of it. I guess
the only catch is that obj->has_gtt_mapping must be kept in sync w/
reality if you kill it.

> > -		if (obj->has_aliasing_ppgtt_mapping)
> > -			i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt,
> > -					       obj, cache_level);
> > +
> > +		vm->insert_entries(vm, obj->pages,
> > +				   i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
> > +				   cache_level);
> >  
> >  		i915_gem_obj_set_color(obj, vm, cache_level);

This cache level stuff ends up looking a bit wonky, but I guess you
didn't spend much time on it yet.

I'll just note that I don't think we can allow per address space cache
levels through this interface since that would break the case where the
client renders to the buffer, then the server/compositor sets it to
uncached and does a page flip. After this the client has to also use
uncached mappings or the server/compositor won't realize that it would
need to clflush again before page flipping.

So I think you can just eliminate the vm argument from this function and
then the function could look something like this (pardon my pseudo
code):

  ...
  if (bound_any(obj)) {
    finish_gpu
    finish_gtt
    put_fence
    ...
  }

  for_each_safe(obj->vma_list) {
    vm = vma->vm;
    node = vma->node;
    if (verify_gtt(node, cache_level)) {
      unbind(ovj, vm);
      continue;
    }
    vm->insert_range();
    vma->color = cacle_level;
  }
  ...


This also got me thinking about MOCS. That think seems a bit dangerous
in this case since the client can easily override the cache level w/o
the server/compositor noticing. I guess we just have to make a rule that
clients aren't allowed to override cache level w/ MOCS for window
system buffers.

Also IIRC someone told me that w/ uncached mappings the caches aren't
snooped even on LLC platforms. If that's true, MOCS seems even more
dangerous since the client could easily mix cached and uncached
accesses. I don't really understand why uncached mappings wouldn't
snoop on LLC platforms since the snooping should be more or less free.
Chris Wilson July 2, 2013, 11:07 a.m. UTC | #3
On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote:
> Also IIRC someone told me that w/ uncached mappings the caches aren't
> snooped even on LLC platforms. If that's true, MOCS seems even more
> dangerous since the client could easily mix cached and uncached
> accesses. I don't really understand why uncached mappings wouldn't
> snoop on LLC platforms since the snooping should be more or less free.

Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I
can tell.
-Chris
Ville Syrjälä July 2, 2013, 11:34 a.m. UTC | #4
On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote:
> On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote:
> > Also IIRC someone told me that w/ uncached mappings the caches aren't
> > snooped even on LLC platforms. If that's true, MOCS seems even more
> > dangerous since the client could easily mix cached and uncached
> > accesses. I don't really understand why uncached mappings wouldn't
> > snoop on LLC platforms since the snooping should be more or less free.
> 
> Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I
> can tell.

Can't remember now who said it, or could be I just misunderstood. But if
it's not true, then MOCS seems actually useful. Otherwise it's going to
be a clflush fest when you want to change from cached to uncached.
Chris Wilson July 2, 2013, 11:38 a.m. UTC | #5
On Tue, Jul 02, 2013 at 02:34:59PM +0300, Ville Syrjälä wrote:
> On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote:
> > On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote:
> > > Also IIRC someone told me that w/ uncached mappings the caches aren't
> > > snooped even on LLC platforms. If that's true, MOCS seems even more
> > > dangerous since the client could easily mix cached and uncached
> > > accesses. I don't really understand why uncached mappings wouldn't
> > > snoop on LLC platforms since the snooping should be more or less free.
> > 
> > Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I
> > can tell.
> 
> Can't remember now who said it, or could be I just misunderstood. But if
> it's not true, then MOCS seems actually useful. Otherwise it's going to
> be a clflush fest when you want to change from cached to uncached.

Well MOCS doesn't apply for GTT access by the CPU, so there you only
need to be concerned about whether you are rendering to a scanout. That
is tracked in the ddx, but as you point out mesa would have to assume
that any winsys buffer is a potential scanout unless we add an interface
for it to ask the display server.
-Chris
Daniel Vetter July 2, 2013, 12:34 p.m. UTC | #6
On Tue, Jul 02, 2013 at 12:38:33PM +0100, Chris Wilson wrote:
> On Tue, Jul 02, 2013 at 02:34:59PM +0300, Ville Syrjälä wrote:
> > On Tue, Jul 02, 2013 at 12:07:10PM +0100, Chris Wilson wrote:
> > > On Tue, Jul 02, 2013 at 01:58:13PM +0300, Ville Syrjälä wrote:
> > > > Also IIRC someone told me that w/ uncached mappings the caches aren't
> > > > snooped even on LLC platforms. If that's true, MOCS seems even more
> > > > dangerous since the client could easily mix cached and uncached
> > > > accesses. I don't really understand why uncached mappings wouldn't
> > > > snoop on LLC platforms since the snooping should be more or less free.
> > > 
> > > Who said that? Doesn't appear to be the case on SNB/IVB/HSW as far as I
> > > can tell.
> > 
> > Can't remember now who said it, or could be I just misunderstood. But if
> > it's not true, then MOCS seems actually useful. Otherwise it's going to
> > be a clflush fest when you want to change from cached to uncached.
> 
> Well MOCS doesn't apply for GTT access by the CPU, so there you only
> need to be concerned about whether you are rendering to a scanout. That
> is tracked in the ddx, but as you point out mesa would have to assume
> that any winsys buffer is a potential scanout unless we add an interface
> for it to ask the display server.

I think userspace should make damn sure that it doesn't change the cache
level (i.e. snooped vs. non-snooped or that special write-back mode we
have on some hsw machines). Otherwise we'd be indeed screwed up since the
clflush tracking done by the kernel would be screwed up.

But thus far all the MOCS stuff seems to only be used to select in which
caches and at what age we should allocate cachelines ...
-Daniel
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index af0150e..f05d585 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2170,7 +2170,10 @@  request_to_vm(struct drm_i915_gem_request *request)
 	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
 	struct i915_address_space *vm;
 
-	vm = &dev_priv->gtt.base;
+	if (request->ctx)
+		vm = &request->ctx->ppgtt.base;
+	else
+		vm = &dev_priv->gtt.base;
 
 	return vm;
 }
@@ -2676,10 +2679,10 @@  i915_gem_object_unbind(struct drm_i915_gem_object *obj,
 
 	if (obj->has_global_gtt_mapping && is_i915_ggtt(vm))
 		i915_gem_gtt_unbind_object(obj);
-	if (obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_unbind_object(dev_priv->gtt.aliasing_ppgtt, obj);
-		obj->has_aliasing_ppgtt_mapping = 0;
-	}
+
+	vm->clear_range(vm, i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
+			obj->base.size >> PAGE_SHIFT);
+
 	i915_gem_gtt_finish_object(obj);
 	i915_gem_object_unpin_pages(obj);
 
@@ -3444,11 +3447,12 @@  int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
 				return ret;
 		}
 
-		if (obj->has_global_gtt_mapping)
+		if (!is_i915_ggtt(vm) && obj->has_global_gtt_mapping)
 			i915_gem_gtt_bind_object(obj, cache_level);
-		if (obj->has_aliasing_ppgtt_mapping)
-			i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt,
-					       obj, cache_level);
+
+		vm->insert_entries(vm, obj->pages,
+				   i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
+				   cache_level);
 
 		i915_gem_obj_set_color(obj, vm, cache_level);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 37ebfa2..cea036e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -526,10 +526,14 @@  static int do_switch(struct intel_ring_buffer *ring,
 	if (from == to && from->last_ring == ring)
 		return 0;
 
+	ret = to->ppgtt.switch_mm(&to->ppgtt, ring);
+	if (ret)
+		return ret;
+
 	if (ring != &dev_priv->ring[RCS] && from) {
 		ret = i915_add_request(ring, NULL);
 		if (ret)
-			return ret;
+			goto err_out;
 		i915_gem_context_unreference(from);
 	}
 
@@ -538,7 +542,7 @@  static int do_switch(struct intel_ring_buffer *ring,
 
 	ret = i915_gem_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false);
 	if (ret)
-		return ret;
+		goto err_out;
 
 	/* Clear this page out of any CPU caches for coherent swap-in/out. Note
 	 * that thanks to write = false in this call and us not setting any gpu
@@ -546,10 +550,8 @@  static int do_switch(struct intel_ring_buffer *ring,
 	 * (when switching away from it), this won't block.
 	 * XXX: We need a real interface to do this instead of trickery. */
 	ret = i915_gem_object_set_to_gtt_domain(to->obj, false);
-	if (ret) {
-		i915_gem_object_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	if (!to->obj->has_global_gtt_mapping)
 		i915_gem_gtt_bind_object(to->obj, to->obj->cache_level);
@@ -560,10 +562,8 @@  static int do_switch(struct intel_ring_buffer *ring,
 		hw_flags |= MI_FORCE_RESTORE;
 
 	ret = mi_set_context(ring, to, hw_flags);
-	if (ret) {
-		i915_gem_object_unpin(to->obj);
-		return ret;
-	}
+	if (ret)
+		goto unpin_out;
 
 	/* The backing object for the context is done after switching to the
 	 * *next* context. Therefore we cannot retire the previous context until
@@ -593,7 +593,7 @@  static int do_switch(struct intel_ring_buffer *ring,
 			 * scream.
 			 */
 			WARN_ON(mi_set_context(ring, from, MI_RESTORE_INHIBIT));
-			return ret;
+			goto err_out;
 		}
 
 		i915_gem_object_unpin(from->obj);
@@ -605,8 +605,13 @@  done:
 	ring->last_context = to;
 	to->is_initialized = true;
 	to->last_ring = ring;
-
 	return 0;
+
+unpin_out:
+	i915_gem_object_unpin(to->obj);
+err_out:
+	WARN_ON(from->ppgtt.switch_mm(&from->ppgtt, ring));
+	return ret;
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index aeec8c0..0f6bf3c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -437,11 +437,21 @@  i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 	}
 
 	/* Ensure ppgtt mapping exists if needed */
-	if (dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
-		i915_ppgtt_bind_object(dev_priv->gtt.aliasing_ppgtt,
-				       obj, obj->cache_level);
-
+	if (is_i915_ggtt(vm) &&
+	    dev_priv->gtt.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+		/* FIXME: remove this later */
+		struct i915_address_space *appgtt =
+			&dev_priv->gtt.aliasing_ppgtt->base;
+		unsigned long obj_offset = i915_gem_obj_offset(obj, appgtt);
+
+		appgtt->insert_entries(appgtt, obj->pages,
+				       obj_offset >> PAGE_SHIFT,
+				       obj->cache_level);
 		obj->has_aliasing_ppgtt_mapping = 1;
+	} else {
+		vm->insert_entries(vm, obj->pages,
+				   i915_gem_obj_offset(obj, vm) >> PAGE_SHIFT,
+				   obj->cache_level);
 	}
 
 	if (entry->offset != i915_gem_obj_offset(obj, vm)) {
@@ -864,7 +874,7 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_hw_context *ctx;
 	struct i915_address_space *vm;
 	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
-	u32 exec_start, exec_len;
+	u32 exec_start = args->batch_start_offset, exec_len;
 	u32 mask, flags;
 	int ret, mode, i;
 	bool need_relocs;
@@ -1085,8 +1095,11 @@  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 			goto err;
 	}
 
-	exec_start = i915_gem_obj_offset(batch_obj, vm) +
-		args->batch_start_offset;
+	if (batch_obj->has_global_gtt_mapping)
+		exec_start += i915_gem_ggtt_offset(batch_obj);
+	else
+		exec_start += i915_gem_obj_offset(batch_obj, vm);
+
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {