diff mbox series

[04/12] drm/ttm: add common accounting to the resource mgr v2

Message ID 20220124122514.1832-5-christian.koenig@amd.com (mailing list archive)
State New, archived
Headers show
Series [01/12] drm/ttm: add ttm_resource_fini | expand

Commit Message

Christian König Jan. 24, 2022, 12:25 p.m. UTC
It makes sense to have this in the common manager for debugging and
accounting of how much resources are used.

v2: cleanup kerneldoc a bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
---
 drivers/gpu/drm/ttm/ttm_resource.c |  8 ++++++++
 include/drm/ttm/ttm_resource.h     | 20 +++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

Comments

Daniel Vetter Jan. 25, 2022, 4:37 p.m. UTC | #1
On Mon, Jan 24, 2022 at 01:25:06PM +0100, Christian König wrote:
> It makes sense to have this in the common manager for debugging and
> accounting of how much resources are used.
> 
> v2: cleanup kerneldoc a bit
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Reviewed-by: Huang Rui <ray.huang@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_resource.c |  8 ++++++++
>  include/drm/ttm/ttm_resource.h     | 20 +++++++++++++++++++-
>  2 files changed, 27 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> index 7fdd58b53c61..b8362492980d 100644
> --- a/drivers/gpu/drm/ttm/ttm_resource.c
> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> @@ -33,6 +33,8 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
>                         struct ttm_resource *res)
>  {
> +	struct ttm_resource_manager *man;
> +
>  	res->start = 0;
>  	res->num_pages = PFN_UP(bo->base.size);
>  	res->mem_type = place->mem_type;
> @@ -42,12 +44,16 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>  	res->bus.is_iomem = false;
>  	res->bus.caching = ttm_cached;
>  	res->bo = bo;
> +
> +	man = ttm_manager_type(bo->bdev, place->mem_type);
> +	atomic64_add(bo->base.size, &man->usage);

Doing this with atomics doesn't make a lot of sense to me. Yes with the
current organization it's the only thing to do in drivers, but if we move
this into ttm there's no reason we can track this together with the lru,
consistently with the lru, and under the same spinlock like the lru.

And at least spot-checking a few places the very next thing we generally
do is take the lru lock since there's really no other way to get the
resource into or out of the resource manager.

I think doing atomics for statistics when there's no need is not great,
because then people start using atomics for all kinds of other things, and
then get the barriers wrong. In i915 (simply due to the grotesque amount
of buggy overuse of atomics, both atomic_t and atomic bitfields) we've put
a hard block in place for any atomic addition. So that's why I'm a bit on
a crusade, but I also genuinely don't see why we need them here. All they
do is cost more since we have to take the spinlock anyway, the accounting
is just going to be a slight different (and imo more accurate) place.

Thoughts?

Cheers, Daniel

>  }
>  EXPORT_SYMBOL(ttm_resource_init);
>  
>  void ttm_resource_fini(struct ttm_resource_manager *man,
>  		       struct ttm_resource *res)
>  {
> +	atomic64_sub(res->bo->base.size, &man->usage);
>  }
>  EXPORT_SYMBOL(ttm_resource_fini);
>  
> @@ -149,6 +155,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
>  	spin_lock_init(&man->move_lock);
>  	man->bdev = bdev;
>  	man->size = p_size;
> +	atomic64_set(&man->usage, 0);
>  
>  	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>  		INIT_LIST_HEAD(&man->lru[i]);
> @@ -221,6 +228,7 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man,
>  	drm_printf(p, "  use_type: %d\n", man->use_type);
>  	drm_printf(p, "  use_tt: %d\n", man->use_tt);
>  	drm_printf(p, "  size: %llu\n", man->size);
> +	drm_printf(p, "  usage: %llu\n", atomic64_read(&man->usage));
>  	if (man->func->debug)
>  		man->func->debug(man, p);
>  }
> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> index 69eea9d6399b..3d391279b33f 100644
> --- a/include/drm/ttm/ttm_resource.h
> +++ b/include/drm/ttm/ttm_resource.h
> @@ -27,6 +27,7 @@
>  
>  #include <linux/types.h>
>  #include <linux/mutex.h>
> +#include <linux/atomic.h>
>  #include <linux/dma-buf-map.h>
>  #include <linux/dma-fence.h>
>  #include <drm/drm_print.h>
> @@ -132,8 +133,12 @@ struct ttm_resource_manager {
>  	/*
>  	 * Protected by the global->lru_lock.
>  	 */
> -
>  	struct list_head lru[TTM_MAX_BO_PRIORITY];
> +
> +	/**
> +	 * @usage: How much of the region is used, has its own protection.
> +	 */
> +	atomic64_t usage;
>  };
>  
>  /**
> @@ -261,6 +266,19 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>  	man->move = NULL;
>  }
>  
> +/**
> + * ttm_resource_manager_usage
> + *
> + * @man: A memory manager object.
> + *
> + * Return how many resources are currently used.
> + */
> +static inline uint64_t
> +ttm_resource_manager_usage(struct ttm_resource_manager *man)
> +{
> +	return atomic64_read(&man->usage);
> +}
> +
>  void ttm_resource_init(struct ttm_buffer_object *bo,
>                         const struct ttm_place *place,
>                         struct ttm_resource *res);
> -- 
> 2.25.1
>
Christian König Jan. 26, 2022, 2:42 p.m. UTC | #2
Am 25.01.22 um 17:37 schrieb Daniel Vetter:
> On Mon, Jan 24, 2022 at 01:25:06PM +0100, Christian König wrote:
>> It makes sense to have this in the common manager for debugging and
>> accounting of how much resources are used.
>>
>> v2: cleanup kerneldoc a bit
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Reviewed-by: Huang Rui <ray.huang@amd.com>
>> ---
>>   drivers/gpu/drm/ttm/ttm_resource.c |  8 ++++++++
>>   include/drm/ttm/ttm_resource.h     | 20 +++++++++++++++++++-
>>   2 files changed, 27 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
>> index 7fdd58b53c61..b8362492980d 100644
>> --- a/drivers/gpu/drm/ttm/ttm_resource.c
>> +++ b/drivers/gpu/drm/ttm/ttm_resource.c
>> @@ -33,6 +33,8 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>>                          const struct ttm_place *place,
>>                          struct ttm_resource *res)
>>   {
>> +	struct ttm_resource_manager *man;
>> +
>>   	res->start = 0;
>>   	res->num_pages = PFN_UP(bo->base.size);
>>   	res->mem_type = place->mem_type;
>> @@ -42,12 +44,16 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
>>   	res->bus.is_iomem = false;
>>   	res->bus.caching = ttm_cached;
>>   	res->bo = bo;
>> +
>> +	man = ttm_manager_type(bo->bdev, place->mem_type);
>> +	atomic64_add(bo->base.size, &man->usage);
> Doing this with atomics doesn't make a lot of sense to me. Yes with the
> current organization it's the only thing to do in drivers, but if we move
> this into ttm there's no reason we can track this together with the lru,
> consistently with the lru, and under the same spinlock like the lru.
>
> And at least spot-checking a few places the very next thing we generally
> do is take the lru lock since there's really no other way to get the
> resource into or out of the resource manager.
>
> I think doing atomics for statistics when there's no need is not great,
> because then people start using atomics for all kinds of other things, and
> then get the barriers wrong. In i915 (simply due to the grotesque amount
> of buggy overuse of atomics, both atomic_t and atomic bitfields) we've put
> a hard block in place for any atomic addition. So that's why I'm a bit on
> a crusade, but I also genuinely don't see why we need them here. All they
> do is cost more since we have to take the spinlock anyway, the accounting
> is just going to be a slight different (and imo more accurate) place.
>
> Thoughts?

Well it depends. We have two use cases for those statistics:
1. Early abort when there isn't enough free resources.
2. Debugging

For the debugging it's completely irrelevant if we grab the lock or not, 
but for the early abort I'm not that sure.

Anyway I will just put that under the lock instead for now, if we really 
find that it is contended we could still switch back to an atomic.

Regards,
Christian.

>
> Cheers, Daniel
>
>>   }
>>   EXPORT_SYMBOL(ttm_resource_init);
>>   
>>   void ttm_resource_fini(struct ttm_resource_manager *man,
>>   		       struct ttm_resource *res)
>>   {
>> +	atomic64_sub(res->bo->base.size, &man->usage);
>>   }
>>   EXPORT_SYMBOL(ttm_resource_fini);
>>   
>> @@ -149,6 +155,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
>>   	spin_lock_init(&man->move_lock);
>>   	man->bdev = bdev;
>>   	man->size = p_size;
>> +	atomic64_set(&man->usage, 0);
>>   
>>   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
>>   		INIT_LIST_HEAD(&man->lru[i]);
>> @@ -221,6 +228,7 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man,
>>   	drm_printf(p, "  use_type: %d\n", man->use_type);
>>   	drm_printf(p, "  use_tt: %d\n", man->use_tt);
>>   	drm_printf(p, "  size: %llu\n", man->size);
>> +	drm_printf(p, "  usage: %llu\n", atomic64_read(&man->usage));
>>   	if (man->func->debug)
>>   		man->func->debug(man, p);
>>   }
>> diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
>> index 69eea9d6399b..3d391279b33f 100644
>> --- a/include/drm/ttm/ttm_resource.h
>> +++ b/include/drm/ttm/ttm_resource.h
>> @@ -27,6 +27,7 @@
>>   
>>   #include <linux/types.h>
>>   #include <linux/mutex.h>
>> +#include <linux/atomic.h>
>>   #include <linux/dma-buf-map.h>
>>   #include <linux/dma-fence.h>
>>   #include <drm/drm_print.h>
>> @@ -132,8 +133,12 @@ struct ttm_resource_manager {
>>   	/*
>>   	 * Protected by the global->lru_lock.
>>   	 */
>> -
>>   	struct list_head lru[TTM_MAX_BO_PRIORITY];
>> +
>> +	/**
>> +	 * @usage: How much of the region is used, has its own protection.
>> +	 */
>> +	atomic64_t usage;
>>   };
>>   
>>   /**
>> @@ -261,6 +266,19 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
>>   	man->move = NULL;
>>   }
>>   
>> +/**
>> + * ttm_resource_manager_usage
>> + *
>> + * @man: A memory manager object.
>> + *
>> + * Return how many resources are currently used.
>> + */
>> +static inline uint64_t
>> +ttm_resource_manager_usage(struct ttm_resource_manager *man)
>> +{
>> +	return atomic64_read(&man->usage);
>> +}
>> +
>>   void ttm_resource_init(struct ttm_buffer_object *bo,
>>                          const struct ttm_place *place,
>>                          struct ttm_resource *res);
>> -- 
>> 2.25.1
>>
Daniel Vetter Jan. 27, 2022, 8:48 a.m. UTC | #3
On Wed, Jan 26, 2022 at 03:42:21PM +0100, Christian König wrote:
> Am 25.01.22 um 17:37 schrieb Daniel Vetter:
> > On Mon, Jan 24, 2022 at 01:25:06PM +0100, Christian König wrote:
> > > It makes sense to have this in the common manager for debugging and
> > > accounting of how much resources are used.
> > > 
> > > v2: cleanup kerneldoc a bit
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Reviewed-by: Huang Rui <ray.huang@amd.com>
> > > ---
> > >   drivers/gpu/drm/ttm/ttm_resource.c |  8 ++++++++
> > >   include/drm/ttm/ttm_resource.h     | 20 +++++++++++++++++++-
> > >   2 files changed, 27 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
> > > index 7fdd58b53c61..b8362492980d 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_resource.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_resource.c
> > > @@ -33,6 +33,8 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
> > >                          const struct ttm_place *place,
> > >                          struct ttm_resource *res)
> > >   {
> > > +	struct ttm_resource_manager *man;
> > > +
> > >   	res->start = 0;
> > >   	res->num_pages = PFN_UP(bo->base.size);
> > >   	res->mem_type = place->mem_type;
> > > @@ -42,12 +44,16 @@ void ttm_resource_init(struct ttm_buffer_object *bo,
> > >   	res->bus.is_iomem = false;
> > >   	res->bus.caching = ttm_cached;
> > >   	res->bo = bo;
> > > +
> > > +	man = ttm_manager_type(bo->bdev, place->mem_type);
> > > +	atomic64_add(bo->base.size, &man->usage);
> > Doing this with atomics doesn't make a lot of sense to me. Yes with the
> > current organization it's the only thing to do in drivers, but if we move
> > this into ttm there's no reason we can track this together with the lru,
> > consistently with the lru, and under the same spinlock like the lru.
> > 
> > And at least spot-checking a few places the very next thing we generally
> > do is take the lru lock since there's really no other way to get the
> > resource into or out of the resource manager.
> > 
> > I think doing atomics for statistics when there's no need is not great,
> > because then people start using atomics for all kinds of other things, and
> > then get the barriers wrong. In i915 (simply due to the grotesque amount
> > of buggy overuse of atomics, both atomic_t and atomic bitfields) we've put
> > a hard block in place for any atomic addition. So that's why I'm a bit on
> > a crusade, but I also genuinely don't see why we need them here. All they
> > do is cost more since we have to take the spinlock anyway, the accounting
> > is just going to be a slight different (and imo more accurate) place.
> > 
> > Thoughts?
> 
> Well it depends. We have two use cases for those statistics:
> 1. Early abort when there isn't enough free resources.
> 2. Debugging
> 
> For the debugging it's completely irrelevant if we grab the lock or not, but
> for the early abort I'm not that sure.
> 
> Anyway I will just put that under the lock instead for now, if we really
> find that it is contended we could still switch back to an atomic.

To clarify, I don't mind using atomic for statistics, it's kinda the prime
example use case for them (and the reason really why they're unordered
atomics by default). It's just if you already do take a lock anyway might
as well include the statistics in there too, because except in really
silly cases that should be all faster. Worst case you need to make sure
that the datastructure and statistics (list_head and u64 here) are in the
same cacheline, and with that the statistics practically become free when
done under the spinlock.

Anyway just figured I'll drop a bit more of my thinking on this topic
here, we're agreeing already on what the code should look like anyway :-)

Cheers, Daniel

> 
> Regards,
> Christian.
> 
> > 
> > Cheers, Daniel
> > 
> > >   }
> > >   EXPORT_SYMBOL(ttm_resource_init);
> > >   void ttm_resource_fini(struct ttm_resource_manager *man,
> > >   		       struct ttm_resource *res)
> > >   {
> > > +	atomic64_sub(res->bo->base.size, &man->usage);
> > >   }
> > >   EXPORT_SYMBOL(ttm_resource_fini);
> > > @@ -149,6 +155,7 @@ void ttm_resource_manager_init(struct ttm_resource_manager *man,
> > >   	spin_lock_init(&man->move_lock);
> > >   	man->bdev = bdev;
> > >   	man->size = p_size;
> > > +	atomic64_set(&man->usage, 0);
> > >   	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
> > >   		INIT_LIST_HEAD(&man->lru[i]);
> > > @@ -221,6 +228,7 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man,
> > >   	drm_printf(p, "  use_type: %d\n", man->use_type);
> > >   	drm_printf(p, "  use_tt: %d\n", man->use_tt);
> > >   	drm_printf(p, "  size: %llu\n", man->size);
> > > +	drm_printf(p, "  usage: %llu\n", atomic64_read(&man->usage));
> > >   	if (man->func->debug)
> > >   		man->func->debug(man, p);
> > >   }
> > > diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
> > > index 69eea9d6399b..3d391279b33f 100644
> > > --- a/include/drm/ttm/ttm_resource.h
> > > +++ b/include/drm/ttm/ttm_resource.h
> > > @@ -27,6 +27,7 @@
> > >   #include <linux/types.h>
> > >   #include <linux/mutex.h>
> > > +#include <linux/atomic.h>
> > >   #include <linux/dma-buf-map.h>
> > >   #include <linux/dma-fence.h>
> > >   #include <drm/drm_print.h>
> > > @@ -132,8 +133,12 @@ struct ttm_resource_manager {
> > >   	/*
> > >   	 * Protected by the global->lru_lock.
> > >   	 */
> > > -
> > >   	struct list_head lru[TTM_MAX_BO_PRIORITY];
> > > +
> > > +	/**
> > > +	 * @usage: How much of the region is used, has its own protection.
> > > +	 */
> > > +	atomic64_t usage;
> > >   };
> > >   /**
> > > @@ -261,6 +266,19 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
> > >   	man->move = NULL;
> > >   }
> > > +/**
> > > + * ttm_resource_manager_usage
> > > + *
> > > + * @man: A memory manager object.
> > > + *
> > > + * Return how many resources are currently used.
> > > + */
> > > +static inline uint64_t
> > > +ttm_resource_manager_usage(struct ttm_resource_manager *man)
> > > +{
> > > +	return atomic64_read(&man->usage);
> > > +}
> > > +
> > >   void ttm_resource_init(struct ttm_buffer_object *bo,
> > >                          const struct ttm_place *place,
> > >                          struct ttm_resource *res);
> > > -- 
> > > 2.25.1
> > > 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 7fdd58b53c61..b8362492980d 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -33,6 +33,8 @@  void ttm_resource_init(struct ttm_buffer_object *bo,
                        const struct ttm_place *place,
                        struct ttm_resource *res)
 {
+	struct ttm_resource_manager *man;
+
 	res->start = 0;
 	res->num_pages = PFN_UP(bo->base.size);
 	res->mem_type = place->mem_type;
@@ -42,12 +44,16 @@  void ttm_resource_init(struct ttm_buffer_object *bo,
 	res->bus.is_iomem = false;
 	res->bus.caching = ttm_cached;
 	res->bo = bo;
+
+	man = ttm_manager_type(bo->bdev, place->mem_type);
+	atomic64_add(bo->base.size, &man->usage);
 }
 EXPORT_SYMBOL(ttm_resource_init);
 
 void ttm_resource_fini(struct ttm_resource_manager *man,
 		       struct ttm_resource *res)
 {
+	atomic64_sub(res->bo->base.size, &man->usage);
 }
 EXPORT_SYMBOL(ttm_resource_fini);
 
@@ -149,6 +155,7 @@  void ttm_resource_manager_init(struct ttm_resource_manager *man,
 	spin_lock_init(&man->move_lock);
 	man->bdev = bdev;
 	man->size = p_size;
+	atomic64_set(&man->usage, 0);
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
 		INIT_LIST_HEAD(&man->lru[i]);
@@ -221,6 +228,7 @@  void ttm_resource_manager_debug(struct ttm_resource_manager *man,
 	drm_printf(p, "  use_type: %d\n", man->use_type);
 	drm_printf(p, "  use_tt: %d\n", man->use_tt);
 	drm_printf(p, "  size: %llu\n", man->size);
+	drm_printf(p, "  usage: %llu\n", atomic64_read(&man->usage));
 	if (man->func->debug)
 		man->func->debug(man, p);
 }
diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h
index 69eea9d6399b..3d391279b33f 100644
--- a/include/drm/ttm/ttm_resource.h
+++ b/include/drm/ttm/ttm_resource.h
@@ -27,6 +27,7 @@ 
 
 #include <linux/types.h>
 #include <linux/mutex.h>
+#include <linux/atomic.h>
 #include <linux/dma-buf-map.h>
 #include <linux/dma-fence.h>
 #include <drm/drm_print.h>
@@ -132,8 +133,12 @@  struct ttm_resource_manager {
 	/*
 	 * Protected by the global->lru_lock.
 	 */
-
 	struct list_head lru[TTM_MAX_BO_PRIORITY];
+
+	/**
+	 * @usage: How much of the region is used, has its own protection.
+	 */
+	atomic64_t usage;
 };
 
 /**
@@ -261,6 +266,19 @@  ttm_resource_manager_cleanup(struct ttm_resource_manager *man)
 	man->move = NULL;
 }
 
+/**
+ * ttm_resource_manager_usage
+ *
+ * @man: A memory manager object.
+ *
+ * Return how many resources are currently used.
+ */
+static inline uint64_t
+ttm_resource_manager_usage(struct ttm_resource_manager *man)
+{
+	return atomic64_read(&man->usage);
+}
+
 void ttm_resource_init(struct ttm_buffer_object *bo,
                        const struct ttm_place *place,
                        struct ttm_resource *res);