diff mbox series

[2/3] drm/i915: prefer FORCE_WC for the blitter routines

Message ID 20210118141732.90173-2-matthew.auld@intel.com (mailing list archive)
State New, archived
Headers show
Series [1/3] drm/i915: Fix the sgt.pfn sanity check | expand

Commit Message

Matthew Auld Jan. 18, 2021, 2:17 p.m. UTC
From: CQ Tang <cq.tang@intel.com>

The pool is shared and so we might find that there is a pool object with
an existing mapping, but is mapped with different underlying type, which
will result in -EBUSY.

Signed-off-by: CQ Tang <cq.tang@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

Comments

Chris Wilson Jan. 18, 2021, 2:44 p.m. UTC | #1
Quoting Matthew Auld (2021-01-18 14:17:31)
> From: CQ Tang <cq.tang@intel.com>

First patch hasn't arrive, so excuse this misplaced reply.

-	if (GEM_WARN_ON(!r->sgt.pfn))
+	if (GEM_WARN_ON(!use_dma(r->iobase) && !r->sgt.pfn))
 		return -EINVAL;

The better check would be if (GEM_WARN_ON(!r->sgt.sgp)) return -EINVAL;
-Chris
Chris Wilson Jan. 18, 2021, 2:54 p.m. UTC | #2
Quoting Matthew Auld (2021-01-18 14:17:31)
> From: CQ Tang <cq.tang@intel.com>
> 
> The pool is shared and so we might find that there is a pool object with
> an existing mapping, but is mapped with different underlying type, which
> will result in -EBUSY.
> 
> Signed-off-by: CQ Tang <cq.tang@intel.com>
> Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> index 10cac9fac79b..c6db745900b3 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> @@ -55,7 +55,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
>         if (unlikely(err))
>                 goto out_put;
>  
> -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
>         if (IS_ERR(cmd)) {
>                 err = PTR_ERR(cmd);
>                 goto out_unpin;
> @@ -277,7 +277,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
>         if (unlikely(err))
>                 goto out_put;
>  
> -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
>         if (IS_ERR(cmd)) {
>                 err = PTR_ERR(cmd);
>                 goto out_unpin;

FORCE is becoming meaningless.

In this case we pin the pages upon acquiring from the pool, which then
prevents us from changing the mapping type. The purpose of which was so
that we could cache the mapping between users, and here we are saying
that cache is made useless. The danger is that we are now thrashing the
cache, hurting ourselves with the vmap overhead.

Maybe we should move the mapping-type into the buffer-pool cache itself?
-Chris
Matthew Auld Jan. 18, 2021, 3:55 p.m. UTC | #3
On Mon, 18 Jan 2021 at 14:54, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>
> Quoting Matthew Auld (2021-01-18 14:17:31)
> > From: CQ Tang <cq.tang@intel.com>
> >
> > The pool is shared and so we might find that there is a pool object with
> > an existing mapping, but is mapped with different underlying type, which
> > will result in -EBUSY.
> >
> > Signed-off-by: CQ Tang <cq.tang@intel.com>
> > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > ---
> >  drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > index 10cac9fac79b..c6db745900b3 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > @@ -55,7 +55,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
> >         if (unlikely(err))
> >                 goto out_put;
> >
> > -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> > +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
> >         if (IS_ERR(cmd)) {
> >                 err = PTR_ERR(cmd);
> >                 goto out_unpin;
> > @@ -277,7 +277,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
> >         if (unlikely(err))
> >                 goto out_put;
> >
> > -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> > +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
> >         if (IS_ERR(cmd)) {
> >                 err = PTR_ERR(cmd);
> >                 goto out_unpin;
>
> FORCE is becoming meaningless.
>
> In this case we pin the pages upon acquiring from the pool, which then
> prevents us from changing the mapping type. The purpose of which was so
> that we could cache the mapping between users, and here we are saying
> that cache is made useless. The danger is that we are now thrashing the
> cache, hurting ourselves with the vmap overhead.
>
> Maybe we should move the mapping-type into the buffer-pool cache itself?

Yeah, makes sense I think. Maybe something simple like:

--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
@@ -145,7 +145,8 @@ static void pool_retire(struct i915_active *ref)
 }

 static struct intel_gt_buffer_pool_node *
-node_create(struct intel_gt_buffer_pool *pool, size_t sz)
+node_create(struct intel_gt_buffer_pool *pool, size_t sz,
+           enum i915_map_type type)
 {
        struct intel_gt *gt = to_gt(pool);
        struct intel_gt_buffer_pool_node *node;
@@ -169,12 +170,14 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz)

        i915_gem_object_set_readonly(obj);

+       node->type = type;
        node->obj = obj;
        return node;
 }

 struct intel_gt_buffer_pool_node *
-intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size,
+                        enum i915_map_type type)
 {
        struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
        struct intel_gt_buffer_pool_node *node;
@@ -191,6 +194,9 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
                if (node->obj->base.size < size)
                        continue;

+               if (node->type != type)
+                       continue;
+
                age = READ_ONCE(node->age);
                if (!age)
                        continue;
@@ -205,7 +211,7 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
        rcu_read_unlock();

        if (&node->link == list) {
-               node = node_create(pool, size);
+               node = node_create(pool, size, type);
                if (IS_ERR(node))
                        return node;
        }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
index 42cbac003e8a..6068f8f1762e 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
@@ -15,7 +15,8 @@ struct intel_gt;
 struct i915_request;

 struct intel_gt_buffer_pool_node *
-intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
+intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size,
+                        enum i915_map_type type);

 static inline int
 intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
index bcf1658c9633..e8f7dba36b76 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
@@ -31,6 +31,7 @@ struct intel_gt_buffer_pool_node {
                struct rcu_head rcu;
        };
        unsigned long age;
+       enum i915_map_type type;
 };

Or maybe it should be split over multiple lists or something, one for each type?

> -Chris
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Chris Wilson Jan. 18, 2021, 4:02 p.m. UTC | #4
Quoting Matthew Auld (2021-01-18 15:55:31)
> On Mon, 18 Jan 2021 at 14:54, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >
> > Quoting Matthew Auld (2021-01-18 14:17:31)
> > > From: CQ Tang <cq.tang@intel.com>
> > >
> > > The pool is shared and so we might find that there is a pool object with
> > > an existing mapping, but is mapped with different underlying type, which
> > > will result in -EBUSY.
> > >
> > > Signed-off-by: CQ Tang <cq.tang@intel.com>
> > > Signed-off-by: Matthew Auld <matthew.auld@intel.com>
> > > ---
> > >  drivers/gpu/drm/i915/gem/i915_gem_object_blt.c | 4 ++--
> > >  1 file changed, 2 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > > index 10cac9fac79b..c6db745900b3 100644
> > > --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
> > > @@ -55,7 +55,7 @@ struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
> > >         if (unlikely(err))
> > >                 goto out_put;
> > >
> > > -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> > > +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
> > >         if (IS_ERR(cmd)) {
> > >                 err = PTR_ERR(cmd);
> > >                 goto out_unpin;
> > > @@ -277,7 +277,7 @@ struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
> > >         if (unlikely(err))
> > >                 goto out_put;
> > >
> > > -       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
> > > +       cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
> > >         if (IS_ERR(cmd)) {
> > >                 err = PTR_ERR(cmd);
> > >                 goto out_unpin;
> >
> > FORCE is becoming meaningless.
> >
> > In this case we pin the pages upon acquiring from the pool, which then
> > prevents us from changing the mapping type. The purpose of which was so
> > that we could cache the mapping between users, and here we are saying
> > that cache is made useless. The danger is that we are now thrashing the
> > cache, hurting ourselves with the vmap overhead.
> >
> > Maybe we should move the mapping-type into the buffer-pool cache itself?
> 
> Yeah, makes sense I think. Maybe something simple like:
> 
> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.c
> @@ -145,7 +145,8 @@ static void pool_retire(struct i915_active *ref)
>  }
> 
>  static struct intel_gt_buffer_pool_node *
> -node_create(struct intel_gt_buffer_pool *pool, size_t sz)
> +node_create(struct intel_gt_buffer_pool *pool, size_t sz,
> +           enum i915_map_type type)
>  {
>         struct intel_gt *gt = to_gt(pool);
>         struct intel_gt_buffer_pool_node *node;
> @@ -169,12 +170,14 @@ node_create(struct intel_gt_buffer_pool *pool, size_t sz)
> 
>         i915_gem_object_set_readonly(obj);
> 
> +       node->type = type;
>         node->obj = obj;
>         return node;
>  }
> 
>  struct intel_gt_buffer_pool_node *
> -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
> +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size,
> +                        enum i915_map_type type)
>  {
>         struct intel_gt_buffer_pool *pool = &gt->buffer_pool;
>         struct intel_gt_buffer_pool_node *node;
> @@ -191,6 +194,9 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
>                 if (node->obj->base.size < size)
>                         continue;
> 
> +               if (node->type != type)
> +                       continue;
> +
>                 age = READ_ONCE(node->age);
>                 if (!age)
>                         continue;
> @@ -205,7 +211,7 @@ intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size)
>         rcu_read_unlock();
> 
>         if (&node->link == list) {
> -               node = node_create(pool, size);
> +               node = node_create(pool, size, type);
>                 if (IS_ERR(node))
>                         return node;
>         }
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> index 42cbac003e8a..6068f8f1762e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool.h
> @@ -15,7 +15,8 @@ struct intel_gt;
>  struct i915_request;
> 
>  struct intel_gt_buffer_pool_node *
> -intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size);
> +intel_gt_get_buffer_pool(struct intel_gt *gt, size_t size,
> +                        enum i915_map_type type);
> 
>  static inline int
>  intel_gt_buffer_pool_mark_active(struct intel_gt_buffer_pool_node *node,
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> index bcf1658c9633..e8f7dba36b76 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_buffer_pool_types.h
> @@ -31,6 +31,7 @@ struct intel_gt_buffer_pool_node {
>                 struct rcu_head rcu;
>         };
>         unsigned long age;
> +       enum i915_map_type type;
>  };
> 
> Or maybe it should be split over multiple lists or something, one for each type?

This looks good for a first pass. We can split the buckets by type later
if we feel so inclined. At the moment, I hope our lists are short enough
that we only have to skip one or two before finding a match.
-Chris
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index 10cac9fac79b..c6db745900b3 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -55,7 +55,7 @@  struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
 	if (unlikely(err))
 		goto out_put;
 
-	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
 		goto out_unpin;
@@ -277,7 +277,7 @@  struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 	if (unlikely(err))
 		goto out_put;
 
-	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
+	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_FORCE_WC);
 	if (IS_ERR(cmd)) {
 		err = PTR_ERR(cmd);
 		goto out_unpin;