diff mbox series

[35/39] drm/i915: Pin pages before waiting

Message ID 20190614071023.17929-36-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show
Series [01/39] drm/i915: Discard some redundant cache domain flushes | expand

Commit Message

Chris Wilson June 14, 2019, 7:10 a.m. UTC
In order to allow for asynchronous gathering of pages tracked by the
obj->resv, we take advantage of pinning the pages before doing waiting
on the reservation, and where possible do an early wait before acquiring
the object lock (with a follow up locked waited to ensure we have
exclusive access where necessary).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |   4 +-
 drivers/gpu/drm/i915/gem/i915_gem_domain.c | 104 +++++++++++----------
 drivers/gpu/drm/i915/i915_gem.c            |  22 +++--
 3 files changed, 68 insertions(+), 62 deletions(-)

Comments

Matthew Auld June 14, 2019, 7:53 p.m. UTC | #1
On Fri, 14 Jun 2019 at 08:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> In order to allow for asynchronous gathering of pages tracked by the
> obj->resv, we take advantage of pinning the pages before doing waiting
> on the reservation, and where possible do an early wait before acquiring
> the object lock (with a follow up locked waited to ensure we have
> exclusive access where necessary).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c |   4 +-
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c | 104 +++++++++++----------
>  drivers/gpu/drm/i915/i915_gem.c            |  22 +++--
>  3 files changed, 68 insertions(+), 62 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> index a93e233cfaa9..84992d590da5 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
> @@ -154,7 +154,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
>         bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
>         int err;
>
> -       err = i915_gem_object_pin_pages(obj);
> +       err = i915_gem_object_pin_pages_async(obj);
>         if (err)
>                 return err;
>
> @@ -175,7 +175,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
>         struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
>         int err;
>
> -       err = i915_gem_object_pin_pages(obj);
> +       err = i915_gem_object_pin_pages_async(obj);
>         if (err)
>                 return err;
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index f9044bbdd429..bda990113124 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -49,17 +49,11 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>
>         assert_object_held(obj);
>
> -       ret = i915_gem_object_wait(obj,
> -                                  I915_WAIT_INTERRUPTIBLE |
> -                                  (write ? I915_WAIT_ALL : 0),
> -                                  MAX_SCHEDULE_TIMEOUT);
> -       if (ret)
> -               return ret;
> -
>         if (obj->write_domain == I915_GEM_DOMAIN_WC)
>                 return 0;
>
> -       /* Flush and acquire obj->pages so that we are coherent through
> +       /*
> +        * Flush and acquire obj->pages so that we are coherent through
>          * direct access in memory with previous cached writes through
>          * shmemfs and that our cache domain tracking remains valid.
>          * For example, if the obj->filp was moved to swap without us
> @@ -67,10 +61,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
>          * continue to assume that the obj remained out of the CPU cached
>          * domain.
>          */
> -       ret = i915_gem_object_pin_pages(obj);
> +       ret = i915_gem_object_pin_pages_async(obj);
>         if (ret)
>                 return ret;
>
> +       ret = i915_gem_object_wait(obj,
> +                                  I915_WAIT_INTERRUPTIBLE |
> +                                  (write ? I915_WAIT_ALL : 0),
> +                                  MAX_SCHEDULE_TIMEOUT);
> +       if (ret)
> +               goto out_unpin;
> +

Do we somehow propagate a potential error from a worker to the
object_wait()? Or should we be looking at obj->mm.pages here?
Chris Wilson June 14, 2019, 7:58 p.m. UTC | #2
Quoting Matthew Auld (2019-06-14 20:53:26)
> On Fri, 14 Jun 2019 at 08:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > @@ -67,10 +61,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> >          * continue to assume that the obj remained out of the CPU cached
> >          * domain.
> >          */
> > -       ret = i915_gem_object_pin_pages(obj);
> > +       ret = i915_gem_object_pin_pages_async(obj);
> >         if (ret)
> >                 return ret;
> >
> > +       ret = i915_gem_object_wait(obj,
> > +                                  I915_WAIT_INTERRUPTIBLE |
> > +                                  (write ? I915_WAIT_ALL : 0),
> > +                                  MAX_SCHEDULE_TIMEOUT);
> > +       if (ret)
> > +               goto out_unpin;
> > +
> 
> Do we somehow propagate a potential error from a worker to the
> object_wait()? Or should we be looking at obj->mm.pages here?

Yeah, I've propagated such errors elsewhere (principally along the
fences). What you are suggesting is tantamount to making
i915_gem_object_wait() report an error, and I have bad memories from all
the unhandled -EIO in the past. However, that feels the natural thing to
do, so lets give it a whirl.
-Chris
Chris Wilson June 14, 2019, 8:13 p.m. UTC | #3
Quoting Chris Wilson (2019-06-14 20:58:09)
> Quoting Matthew Auld (2019-06-14 20:53:26)
> > On Fri, 14 Jun 2019 at 08:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > @@ -67,10 +61,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> > >          * continue to assume that the obj remained out of the CPU cached
> > >          * domain.
> > >          */
> > > -       ret = i915_gem_object_pin_pages(obj);
> > > +       ret = i915_gem_object_pin_pages_async(obj);
> > >         if (ret)
> > >                 return ret;
> > >
> > > +       ret = i915_gem_object_wait(obj,
> > > +                                  I915_WAIT_INTERRUPTIBLE |
> > > +                                  (write ? I915_WAIT_ALL : 0),
> > > +                                  MAX_SCHEDULE_TIMEOUT);
> > > +       if (ret)
> > > +               goto out_unpin;
> > > +
> > 
> > Do we somehow propagate a potential error from a worker to the
> > object_wait()? Or should we be looking at obj->mm.pages here?
> 
> Yeah, I've propagated such errors elsewhere (principally along the
> fences). What you are suggesting is tantamount to making
> i915_gem_object_wait() report an error, and I have bad memories from all
> the unhandled -EIO in the past. However, that feels the natural thing to
> do, so lets give it a whirl.

So we need to check for error pages anyway, because we can't rule out a
race between the pin_pages_async and i915_gem_object_wait.

There's plenty of duplicated code for pin_pages_async, object_wait,
check pages so I should refactor that into a variant, 
i915_gem_object_pin_pages_wait() ?
-Chris
Matthew Auld June 14, 2019, 8:18 p.m. UTC | #4
On Fri, 14 Jun 2019 at 21:13, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Chris Wilson (2019-06-14 20:58:09)
> > Quoting Matthew Auld (2019-06-14 20:53:26)
> > > On Fri, 14 Jun 2019 at 08:11, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > > @@ -67,10 +61,17 @@ i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
> > > >          * continue to assume that the obj remained out of the CPU cached
> > > >          * domain.
> > > >          */
> > > > -       ret = i915_gem_object_pin_pages(obj);
> > > > +       ret = i915_gem_object_pin_pages_async(obj);
> > > >         if (ret)
> > > >                 return ret;
> > > >
> > > > +       ret = i915_gem_object_wait(obj,
> > > > +                                  I915_WAIT_INTERRUPTIBLE |
> > > > +                                  (write ? I915_WAIT_ALL : 0),
> > > > +                                  MAX_SCHEDULE_TIMEOUT);
> > > > +       if (ret)
> > > > +               goto out_unpin;
> > > > +
> > >
> > > Do we somehow propagate a potential error from a worker to the
> > > object_wait()? Or should we be looking at obj->mm.pages here?
> >
> > Yeah, I've propagated such errors elsewhere (principally along the
> > fences). What you are suggesting is tantamount to making
> > i915_gem_object_wait() report an error, and I have bad memories from all
> > the unhandled -EIO in the past. However, that feels the natural thing to
> > do, so lets give it a whirl.
>
> So we need to check for error pages anyway, because we can't rule out a
> race between the pin_pages_async and i915_gem_object_wait.
>
> There's plenty of duplicated code for pin_pages_async, object_wait,
> check pages so I should refactor that into a variant,
> i915_gem_object_pin_pages_wait() ?

Yeah, makes sense to me.
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index a93e233cfaa9..84992d590da5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -154,7 +154,7 @@  static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
 	int err;
 
-	err = i915_gem_object_pin_pages(obj);
+	err = i915_gem_object_pin_pages_async(obj);
 	if (err)
 		return err;
 
@@ -175,7 +175,7 @@  static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
 	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	int err;
 
-	err = i915_gem_object_pin_pages(obj);
+	err = i915_gem_object_pin_pages_async(obj);
 	if (err)
 		return err;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index f9044bbdd429..bda990113124 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -49,17 +49,11 @@  i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 
 	assert_object_held(obj);
 
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
 	if (obj->write_domain == I915_GEM_DOMAIN_WC)
 		return 0;
 
-	/* Flush and acquire obj->pages so that we are coherent through
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
 	 * direct access in memory with previous cached writes through
 	 * shmemfs and that our cache domain tracking remains valid.
 	 * For example, if the obj->filp was moved to swap without us
@@ -67,10 +61,17 @@  i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 	 * continue to assume that the obj remained out of the CPU cached
 	 * domain.
 	 */
-	ret = i915_gem_object_pin_pages(obj);
+	ret = i915_gem_object_pin_pages_async(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out_unpin;
+
 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
 
 	/* Serialise direct access to this object with the barriers for
@@ -91,8 +92,9 @@  i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->mm.dirty = true;
 	}
 
+out_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return 0;
+	return ret;
 }
 
 /**
@@ -110,17 +112,11 @@  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 
 	assert_object_held(obj);
 
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   (write ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (ret)
-		return ret;
-
 	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
 		return 0;
 
-	/* Flush and acquire obj->pages so that we are coherent through
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
 	 * direct access in memory with previous cached writes through
 	 * shmemfs and that our cache domain tracking remains valid.
 	 * For example, if the obj->filp was moved to swap without us
@@ -128,10 +124,17 @@  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	 * continue to assume that the obj remained out of the CPU cached
 	 * domain.
 	 */
-	ret = i915_gem_object_pin_pages(obj);
+	ret = i915_gem_object_pin_pages_async(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		goto out_unpin;
+
 	i915_gem_object_flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
 
 	/* Serialise direct access to this object with the barriers for
@@ -152,8 +155,9 @@  i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 		obj->mm.dirty = true;
 	}
 
+out_unpin:
 	i915_gem_object_unpin_pages(obj);
-	return 0;
+	return ret;
 }
 
 /**
@@ -603,19 +607,6 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 		goto out;
 	}
 
-	/*
-	 * Try to flush the object off the GPU without holding the lock.
-	 * We will repeat the flush holding the lock in the normal manner
-	 * to catch cases where we are gazumped.
-	 */
-	err = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_PRIORITY |
-				   (write_domain ? I915_WAIT_ALL : 0),
-				   MAX_SCHEDULE_TIMEOUT);
-	if (err)
-		goto out;
-
 	/*
 	 * Proxy objects do not control access to the backing storage, ergo
 	 * they cannot be used as a means to manipulate the cache domain
@@ -636,10 +627,23 @@  i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	 * continue to assume that the obj remained out of the CPU cached
 	 * domain.
 	 */
-	err = i915_gem_object_pin_pages(obj);
+	err = i915_gem_object_pin_pages_async(obj);
 	if (err)
 		goto out;
 
+	/*
+	 * Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out_unpin;
+
 	err = i915_gem_object_lock_interruptible(obj);
 	if (err)
 		goto out_unpin;
@@ -680,25 +684,25 @@  int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_pin_pages_async(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		goto err_unpin;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
 		goto err_unlock;
 
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err_unlock;
-
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, false);
 		if (ret)
-			goto err_unpin;
+			goto err_unlock;
 		else
 			goto out;
 	}
@@ -718,10 +722,10 @@  int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
 	/* return with the pages pinned */
 	return 0;
 
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
 err_unlock:
 	i915_gem_object_unlock(obj);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
 	return ret;
 }
 
@@ -734,10 +738,14 @@  int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	if (!i915_gem_object_has_struct_page(obj))
 		return -ENODEV;
 
-	ret = i915_gem_object_lock_interruptible(obj);
+	ret = i915_gem_object_pin_pages_async(obj);
 	if (ret)
 		return ret;
 
+	ret = i915_gem_object_lock_interruptible(obj);
+	if (ret)
+		goto err_unpin;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
@@ -745,15 +753,11 @@  int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	if (ret)
 		goto err_unlock;
 
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err_unlock;
-
 	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
 	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
 		ret = i915_gem_object_set_to_cpu_domain(obj, true);
 		if (ret)
-			goto err_unpin;
+			goto err_unlock;
 		else
 			goto out;
 	}
@@ -782,9 +786,9 @@  int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
 	/* return with the pages pinned */
 	return 0;
 
-err_unpin:
-	i915_gem_object_unpin_pages(obj);
 err_unlock:
 	i915_gem_object_unlock(obj);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9e835355eb93..21416b87905e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -512,20 +512,21 @@  i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 
 	trace_i915_gem_object_pread(obj, args->offset, args->size);
 
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE,
-				   MAX_SCHEDULE_TIMEOUT);
+	ret = i915_gem_object_pin_pages_async(obj);
 	if (ret)
 		goto out;
 
-	ret = i915_gem_object_pin_pages(obj);
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		goto out;
+		goto out_unpin;
 
 	ret = i915_gem_shmem_pread(obj, args);
 	if (ret == -EFAULT || ret == -ENODEV)
 		ret = i915_gem_gtt_pread(obj, args);
 
+out_unpin:
 	i915_gem_object_unpin_pages(obj);
 out:
 	i915_gem_object_put(obj);
@@ -812,16 +813,16 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (ret != -ENODEV)
 		goto err;
 
+	ret = i915_gem_object_pin_pages_async(obj);
+	if (ret)
+		goto err;
+
 	ret = i915_gem_object_wait(obj,
 				   I915_WAIT_INTERRUPTIBLE |
 				   I915_WAIT_ALL,
 				   MAX_SCHEDULE_TIMEOUT);
 	if (ret)
-		goto err;
-
-	ret = i915_gem_object_pin_pages(obj);
-	if (ret)
-		goto err;
+		goto err_unpin;
 
 	ret = -EFAULT;
 	/* We can only do the GTT pwrite on untiled buffers, as otherwise
@@ -845,6 +846,7 @@  i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			ret = i915_gem_shmem_pwrite(obj, args);
 	}
 
+err_unpin:
 	i915_gem_object_unpin_pages(obj);
 err:
 	i915_gem_object_put(obj);