diff mbox series

[RFC,6/6] drm/i915: Implement fdinfo memory stats printing

Message ID 20230417155613.4143258-7-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series fdinfo alternative memory stats proposal | expand

Commit Message

Tvrtko Ursulin April 17, 2023, 3:56 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Show how more driver specific set of memory stats could be shown,
more specifically where object can reside in multiple regions, showing all
the supported stats, and where there is more to show than just user visible
objects.

WIP...

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 drivers/gpu/drm/i915/i915_driver.c     |   5 ++
 drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
 drivers/gpu/drm/i915/i915_drv.h        |   2 +
 4 files changed, 117 insertions(+)

Comments

Rob Clark April 18, 2023, 2:39 p.m. UTC | #1
On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Show how more driver specific set of memory stats could be shown,
> more specifically where object can reside in multiple regions, showing all
> the supported stats, and where there is more to show than just user visible
> objects.
>
> WIP...
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_driver.c     |   5 ++
>  drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
>  drivers/gpu/drm/i915/i915_drv.h        |   2 +
>  4 files changed, 117 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> index 6493548c69bf..4c70206cbc27 100644
> --- a/drivers/gpu/drm/i915/i915_driver.c
> +++ b/drivers/gpu/drm/i915/i915_driver.c
> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
>         .dumb_create = i915_gem_dumb_create,
>         .dumb_map_offset = i915_gem_dumb_mmap_offset,
>
> +#ifdef CONFIG_PROC_FS
> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
> +#endif
> +
>         .ioctls = i915_ioctls,
>         .num_ioctls = ARRAY_SIZE(i915_ioctls),
>         .fops = &i915_driver_fops,
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
> index c654984189f7..65857c68bdb3 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.c
> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> @@ -12,6 +12,7 @@
>  #include <drm/drm_print.h>
>
>  #include "gem/i915_gem_context.h"
> +#include "intel_memory_region.h"
>  #include "i915_drm_client.h"
>  #include "i915_file_private.h"
>  #include "i915_gem.h"
> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>         for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
>                 show_client_class(p, i915, file_priv->client, i);
>  }
> +
> +char **
> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
> +{
> +       struct drm_i915_private *i915 = to_i915(dev);
> +       struct intel_memory_region *mr;
> +       enum intel_region_id id;
> +
> +       /* FIXME move to init */
> +       for_each_memory_region(mr, i915, id) {
> +               if (!i915->mm.region_names[id])
> +                       i915->mm.region_names[id] = mr->name;
> +       }
> +
> +       *num = id;
> +
> +       return i915->mm.region_names;
> +}
> +
> +static void
> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
> +{
> +        struct intel_memory_region *mr;
> +       u64 sz = obj->base.size;
> +        enum intel_region_id id;
> +       unsigned int i;
> +
> +       if (!obj)
> +               return;
> +
> +       /* Attribute size and shared to all possible memory regions. */
> +       for (i = 0; i < obj->mm.n_placements; i++) {
> +               mr = obj->mm.placements[i];
> +               id = mr->id;
> +
> +               stats[id].size += sz;

This implies that summing up all of the categories is not the same as
the toplevel stats that I was proposing

BR,
-R

> +               if (obj->base.handle_count > 1)
> +                       stats[id].shared += sz;
> +       }
> +
> +       /* Attribute other categories to only the current region. */
> +       mr = obj->mm.region;
> +       if (mr)
> +               id = mr->id;
> +       else
> +               id = INTEL_REGION_SMEM;
> +
> +       if (!i915_gem_object_has_pages(obj))
> +               return;
> +
> +       stats[id].resident += sz;
> +
> +       if (!dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(true)))
> +               stats[id].active += sz;
> +       else if (i915_gem_object_is_shrinkable(obj) &&
> +               obj->mm.madv == I915_MADV_DONTNEED)
> +               stats[id].purgeable += sz;
> +}
> +
> +void
> +i915_query_fdinfo_memory_stats(struct drm_file *file,
> +                              struct drm_fdinfo_memory_stat *stats)
> +{
> +       struct drm_i915_file_private *file_priv = file->driver_priv;
> +       struct i915_drm_client *client = file_priv->client;
> +       struct drm_gem_object *drm_obj;
> +       struct i915_gem_context *ctx;
> +       int id;
> +
> +       /*
> +        * FIXME - we can do this better and in fewer passes if we are to start
> +        * exporting proper memory stats.
> +        */
> +
> +       /* User created objects */
> +       spin_lock(&file->table_lock);
> +       idr_for_each_entry(&file->object_idr, drm_obj, id)
> +               add_obj(to_intel_bo(drm_obj), stats);
> +       spin_unlock(&file->table_lock);
> +
> +       /* Contexts, rings, timelines, page tables, ... */
> +       rcu_read_lock();
> +       list_for_each_entry_rcu(ctx, &client->ctx_list, client_link) {
> +               struct i915_gem_engines_iter it;
> +               struct intel_context *ce;
> +
> +               for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
> +                       /* FIXME races?! */
> +                       if (ce->state)
> +                               add_obj(ce->state->obj, stats);
> +                       if (ce->timeline && ce->timeline->hwsp_ggtt)
> +                               add_obj(ce->timeline->hwsp_ggtt->obj, stats);
> +                       if (ce->ring && ce->ring->vma)
> +                               add_obj(ce->ring->vma->obj, stats);
> +               }
> +
> +               /* TODO  vtx->vm page table backing objects */
> +       }
> +       rcu_read_unlock();
> +}
> +
>  #endif
> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> index 4c18b99e10a4..622936c51903 100644
> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> @@ -14,7 +14,10 @@
>
>  #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>
> +struct drm_device;
>  struct drm_file;
> +struct drm_fdinfo_memory_stat;
> +struct drm_gem_object;
>  struct drm_printer;
>
>  struct i915_drm_client {
> @@ -49,6 +52,11 @@ struct i915_drm_client *i915_drm_client_alloc(void);
>
>  #ifdef CONFIG_PROC_FS
>  void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
> +
> +char **i915_query_fdinfo_memory_regions(struct drm_device *dev,
> +                                       unsigned int *num);
> +void i915_query_fdinfo_memory_stats(struct drm_file *file,
> +                                   struct drm_fdinfo_memory_stat *stats);
>  #endif
>
>  #endif /* !__I915_DRM_CLIENT_H__ */
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index eb739fb9cdbb..b84d2f0ed2cb 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -161,6 +161,8 @@ struct i915_gem_mm {
>
>         struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
>
> +       char *region_names[INTEL_REGION_UNKNOWN];
> +
>         struct notifier_block oom_notifier;
>         struct notifier_block vmap_notifier;
>         struct shrinker shrinker;
> --
> 2.37.2
>
Tvrtko Ursulin April 18, 2023, 2:58 p.m. UTC | #2
On 18/04/2023 15:39, Rob Clark wrote:
> On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Show how more driver specific set of memory stats could be shown,
>> more specifically where object can reside in multiple regions, showing all
>> the supported stats, and where there is more to show than just user visible
>> objects.
>>
>> WIP...
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_driver.c     |   5 ++
>>   drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
>>   drivers/gpu/drm/i915/i915_drv.h        |   2 +
>>   4 files changed, 117 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>> index 6493548c69bf..4c70206cbc27 100644
>> --- a/drivers/gpu/drm/i915/i915_driver.c
>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
>>          .dumb_create = i915_gem_dumb_create,
>>          .dumb_map_offset = i915_gem_dumb_mmap_offset,
>>
>> +#ifdef CONFIG_PROC_FS
>> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
>> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
>> +#endif
>> +
>>          .ioctls = i915_ioctls,
>>          .num_ioctls = ARRAY_SIZE(i915_ioctls),
>>          .fops = &i915_driver_fops,
>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
>> index c654984189f7..65857c68bdb3 100644
>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
>> @@ -12,6 +12,7 @@
>>   #include <drm/drm_print.h>
>>
>>   #include "gem/i915_gem_context.h"
>> +#include "intel_memory_region.h"
>>   #include "i915_drm_client.h"
>>   #include "i915_file_private.h"
>>   #include "i915_gem.h"
>> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>>          for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
>>                  show_client_class(p, i915, file_priv->client, i);
>>   }
>> +
>> +char **
>> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
>> +{
>> +       struct drm_i915_private *i915 = to_i915(dev);
>> +       struct intel_memory_region *mr;
>> +       enum intel_region_id id;
>> +
>> +       /* FIXME move to init */
>> +       for_each_memory_region(mr, i915, id) {
>> +               if (!i915->mm.region_names[id])
>> +                       i915->mm.region_names[id] = mr->name;
>> +       }
>> +
>> +       *num = id;
>> +
>> +       return i915->mm.region_names;
>> +}
>> +
>> +static void
>> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
>> +{
>> +        struct intel_memory_region *mr;
>> +       u64 sz = obj->base.size;
>> +        enum intel_region_id id;
>> +       unsigned int i;
>> +
>> +       if (!obj)
>> +               return;
>> +
>> +       /* Attribute size and shared to all possible memory regions. */
>> +       for (i = 0; i < obj->mm.n_placements; i++) {
>> +               mr = obj->mm.placements[i];
>> +               id = mr->id;
>> +
>> +               stats[id].size += sz;
> 
> This implies that summing up all of the categories is not the same as
> the toplevel stats that I was proposing

Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.

Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.

Regards,

Tvrtko

>> +               if (obj->base.handle_count > 1)
>> +                       stats[id].shared += sz;
>> +       }
>> +
>> +       /* Attribute other categories to only the current region. */
>> +       mr = obj->mm.region;
>> +       if (mr)
>> +               id = mr->id;
>> +       else
>> +               id = INTEL_REGION_SMEM;
>> +
>> +       if (!i915_gem_object_has_pages(obj))
>> +               return;
>> +
>> +       stats[id].resident += sz;
>> +
>> +       if (!dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(true)))
>> +               stats[id].active += sz;
>> +       else if (i915_gem_object_is_shrinkable(obj) &&
>> +               obj->mm.madv == I915_MADV_DONTNEED)
>> +               stats[id].purgeable += sz;
>> +}
>> +
>> +void
>> +i915_query_fdinfo_memory_stats(struct drm_file *file,
>> +                              struct drm_fdinfo_memory_stat *stats)
>> +{
>> +       struct drm_i915_file_private *file_priv = file->driver_priv;
>> +       struct i915_drm_client *client = file_priv->client;
>> +       struct drm_gem_object *drm_obj;
>> +       struct i915_gem_context *ctx;
>> +       int id;
>> +
>> +       /*
>> +        * FIXME - we can do this better and in fewer passes if we are to start
>> +        * exporting proper memory stats.
>> +        */
>> +
>> +       /* User created objects */
>> +       spin_lock(&file->table_lock);
>> +       idr_for_each_entry(&file->object_idr, drm_obj, id)
>> +               add_obj(to_intel_bo(drm_obj), stats);
>> +       spin_unlock(&file->table_lock);
>> +
>> +       /* Contexts, rings, timelines, page tables, ... */
>> +       rcu_read_lock();
>> +       list_for_each_entry_rcu(ctx, &client->ctx_list, client_link) {
>> +               struct i915_gem_engines_iter it;
>> +               struct intel_context *ce;
>> +
>> +               for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
>> +                       /* FIXME races?! */
>> +                       if (ce->state)
>> +                               add_obj(ce->state->obj, stats);
>> +                       if (ce->timeline && ce->timeline->hwsp_ggtt)
>> +                               add_obj(ce->timeline->hwsp_ggtt->obj, stats);
>> +                       if (ce->ring && ce->ring->vma)
>> +                               add_obj(ce->ring->vma->obj, stats);
>> +               }
>> +
>> +               /* TODO  vtx->vm page table backing objects */
>> +       }
>> +       rcu_read_unlock();
>> +}
>> +
>>   #endif
>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
>> index 4c18b99e10a4..622936c51903 100644
>> --- a/drivers/gpu/drm/i915/i915_drm_client.h
>> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
>> @@ -14,7 +14,10 @@
>>
>>   #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
>>
>> +struct drm_device;
>>   struct drm_file;
>> +struct drm_fdinfo_memory_stat;
>> +struct drm_gem_object;
>>   struct drm_printer;
>>
>>   struct i915_drm_client {
>> @@ -49,6 +52,11 @@ struct i915_drm_client *i915_drm_client_alloc(void);
>>
>>   #ifdef CONFIG_PROC_FS
>>   void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
>> +
>> +char **i915_query_fdinfo_memory_regions(struct drm_device *dev,
>> +                                       unsigned int *num);
>> +void i915_query_fdinfo_memory_stats(struct drm_file *file,
>> +                                   struct drm_fdinfo_memory_stat *stats);
>>   #endif
>>
>>   #endif /* !__I915_DRM_CLIENT_H__ */
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index eb739fb9cdbb..b84d2f0ed2cb 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -161,6 +161,8 @@ struct i915_gem_mm {
>>
>>          struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
>>
>> +       char *region_names[INTEL_REGION_UNKNOWN];
>> +
>>          struct notifier_block oom_notifier;
>>          struct notifier_block vmap_notifier;
>>          struct shrinker shrinker;
>> --
>> 2.37.2
>>
Rob Clark April 18, 2023, 4:08 p.m. UTC | #3
On Tue, Apr 18, 2023 at 7:58 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 18/04/2023 15:39, Rob Clark wrote:
> > On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >> Show how more driver specific set of memory stats could be shown,
> >> more specifically where object can reside in multiple regions, showing all
> >> the supported stats, and where there is more to show than just user visible
> >> objects.
> >>
> >> WIP...
> >>
> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_driver.c     |   5 ++
> >>   drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
> >>   drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
> >>   drivers/gpu/drm/i915/i915_drv.h        |   2 +
> >>   4 files changed, 117 insertions(+)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> >> index 6493548c69bf..4c70206cbc27 100644
> >> --- a/drivers/gpu/drm/i915/i915_driver.c
> >> +++ b/drivers/gpu/drm/i915/i915_driver.c
> >> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
> >>          .dumb_create = i915_gem_dumb_create,
> >>          .dumb_map_offset = i915_gem_dumb_mmap_offset,
> >>
> >> +#ifdef CONFIG_PROC_FS
> >> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
> >> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
> >> +#endif
> >> +
> >>          .ioctls = i915_ioctls,
> >>          .num_ioctls = ARRAY_SIZE(i915_ioctls),
> >>          .fops = &i915_driver_fops,
> >> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
> >> index c654984189f7..65857c68bdb3 100644
> >> --- a/drivers/gpu/drm/i915/i915_drm_client.c
> >> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> >> @@ -12,6 +12,7 @@
> >>   #include <drm/drm_print.h>
> >>
> >>   #include "gem/i915_gem_context.h"
> >> +#include "intel_memory_region.h"
> >>   #include "i915_drm_client.h"
> >>   #include "i915_file_private.h"
> >>   #include "i915_gem.h"
> >> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
> >>          for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
> >>                  show_client_class(p, i915, file_priv->client, i);
> >>   }
> >> +
> >> +char **
> >> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
> >> +{
> >> +       struct drm_i915_private *i915 = to_i915(dev);
> >> +       struct intel_memory_region *mr;
> >> +       enum intel_region_id id;
> >> +
> >> +       /* FIXME move to init */
> >> +       for_each_memory_region(mr, i915, id) {
> >> +               if (!i915->mm.region_names[id])
> >> +                       i915->mm.region_names[id] = mr->name;
> >> +       }
> >> +
> >> +       *num = id;
> >> +
> >> +       return i915->mm.region_names;
> >> +}
> >> +
> >> +static void
> >> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
> >> +{
> >> +        struct intel_memory_region *mr;
> >> +       u64 sz = obj->base.size;
> >> +        enum intel_region_id id;
> >> +       unsigned int i;
> >> +
> >> +       if (!obj)
> >> +               return;
> >> +
> >> +       /* Attribute size and shared to all possible memory regions. */
> >> +       for (i = 0; i < obj->mm.n_placements; i++) {
> >> +               mr = obj->mm.placements[i];
> >> +               id = mr->id;
> >> +
> >> +               stats[id].size += sz;
> >
> > This implies that summing up all of the categories is not the same as
> > the toplevel stats that I was proposing

Sorry, I mis-spoke, I meant "summing up all of the regions is not..."

> Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.
>

I split shared because by definition shared buffers can be counted
against multiple drm_file's, whereas private is only counted against
the single drm_file.  Driver or app changes are unlikely to change the
shared size, whereas private footprint is a thing you can optimize to
some degree.

> Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.

Imported vs exported doesn't really matter.. it is just an
implementation detail of the winsys.  But I think it is useful to know
how much of an app's footprint is shared vs private.  You could
express it different ways, but my proposal had private and shared,
from which you can calculate total:

   total = private + shared

but you could flip the path around and advertise just total and
shared, and calculate private from that.

BR,
-R

> Regards,
>
> Tvrtko
>
> >> +               if (obj->base.handle_count > 1)
> >> +                       stats[id].shared += sz;
> >> +       }
> >> +
> >> +       /* Attribute other categories to only the current region. */
> >> +       mr = obj->mm.region;
> >> +       if (mr)
> >> +               id = mr->id;
> >> +       else
> >> +               id = INTEL_REGION_SMEM;
> >> +
> >> +       if (!i915_gem_object_has_pages(obj))
> >> +               return;
> >> +
> >> +       stats[id].resident += sz;
> >> +
> >> +       if (!dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(true)))
> >> +               stats[id].active += sz;
> >> +       else if (i915_gem_object_is_shrinkable(obj) &&
> >> +               obj->mm.madv == I915_MADV_DONTNEED)
> >> +               stats[id].purgeable += sz;
> >> +}
> >> +
> >> +void
> >> +i915_query_fdinfo_memory_stats(struct drm_file *file,
> >> +                              struct drm_fdinfo_memory_stat *stats)
> >> +{
> >> +       struct drm_i915_file_private *file_priv = file->driver_priv;
> >> +       struct i915_drm_client *client = file_priv->client;
> >> +       struct drm_gem_object *drm_obj;
> >> +       struct i915_gem_context *ctx;
> >> +       int id;
> >> +
> >> +       /*
> >> +        * FIXME - we can do this better and in fewer passes if we are to start
> >> +        * exporting proper memory stats.
> >> +        */
> >> +
> >> +       /* User created objects */
> >> +       spin_lock(&file->table_lock);
> >> +       idr_for_each_entry(&file->object_idr, drm_obj, id)
> >> +               add_obj(to_intel_bo(drm_obj), stats);
> >> +       spin_unlock(&file->table_lock);
> >> +
> >> +       /* Contexts, rings, timelines, page tables, ... */
> >> +       rcu_read_lock();
> >> +       list_for_each_entry_rcu(ctx, &client->ctx_list, client_link) {
> >> +               struct i915_gem_engines_iter it;
> >> +               struct intel_context *ce;
> >> +
> >> +               for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
> >> +                       /* FIXME races?! */
> >> +                       if (ce->state)
> >> +                               add_obj(ce->state->obj, stats);
> >> +                       if (ce->timeline && ce->timeline->hwsp_ggtt)
> >> +                               add_obj(ce->timeline->hwsp_ggtt->obj, stats);
> >> +                       if (ce->ring && ce->ring->vma)
> >> +                               add_obj(ce->ring->vma->obj, stats);
> >> +               }
> >> +
> >> +               /* TODO  vtx->vm page table backing objects */
> >> +       }
> >> +       rcu_read_unlock();
> >> +}
> >> +
> >>   #endif
> >> diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
> >> index 4c18b99e10a4..622936c51903 100644
> >> --- a/drivers/gpu/drm/i915/i915_drm_client.h
> >> +++ b/drivers/gpu/drm/i915/i915_drm_client.h
> >> @@ -14,7 +14,10 @@
> >>
> >>   #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
> >>
> >> +struct drm_device;
> >>   struct drm_file;
> >> +struct drm_fdinfo_memory_stat;
> >> +struct drm_gem_object;
> >>   struct drm_printer;
> >>
> >>   struct i915_drm_client {
> >> @@ -49,6 +52,11 @@ struct i915_drm_client *i915_drm_client_alloc(void);
> >>
> >>   #ifdef CONFIG_PROC_FS
> >>   void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
> >> +
> >> +char **i915_query_fdinfo_memory_regions(struct drm_device *dev,
> >> +                                       unsigned int *num);
> >> +void i915_query_fdinfo_memory_stats(struct drm_file *file,
> >> +                                   struct drm_fdinfo_memory_stat *stats);
> >>   #endif
> >>
> >>   #endif /* !__I915_DRM_CLIENT_H__ */
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index eb739fb9cdbb..b84d2f0ed2cb 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -161,6 +161,8 @@ struct i915_gem_mm {
> >>
> >>          struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
> >>
> >> +       char *region_names[INTEL_REGION_UNKNOWN];
> >> +
> >>          struct notifier_block oom_notifier;
> >>          struct notifier_block vmap_notifier;
> >>          struct shrinker shrinker;
> >> --
> >> 2.37.2
> >>
Tvrtko Ursulin April 19, 2023, 2:06 p.m. UTC | #4
On 18/04/2023 17:08, Rob Clark wrote:
> On Tue, Apr 18, 2023 at 7:58 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>> On 18/04/2023 15:39, Rob Clark wrote:
>>> On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>
>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>
>>>> Show how more driver specific set of memory stats could be shown,
>>>> more specifically where object can reside in multiple regions, showing all
>>>> the supported stats, and where there is more to show than just user visible
>>>> objects.
>>>>
>>>> WIP...
>>>>
>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>> ---
>>>>    drivers/gpu/drm/i915/i915_driver.c     |   5 ++
>>>>    drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
>>>>    drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
>>>>    drivers/gpu/drm/i915/i915_drv.h        |   2 +
>>>>    4 files changed, 117 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>>>> index 6493548c69bf..4c70206cbc27 100644
>>>> --- a/drivers/gpu/drm/i915/i915_driver.c
>>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>>>> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
>>>>           .dumb_create = i915_gem_dumb_create,
>>>>           .dumb_map_offset = i915_gem_dumb_mmap_offset,
>>>>
>>>> +#ifdef CONFIG_PROC_FS
>>>> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
>>>> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
>>>> +#endif
>>>> +
>>>>           .ioctls = i915_ioctls,
>>>>           .num_ioctls = ARRAY_SIZE(i915_ioctls),
>>>>           .fops = &i915_driver_fops,
>>>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
>>>> index c654984189f7..65857c68bdb3 100644
>>>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
>>>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
>>>> @@ -12,6 +12,7 @@
>>>>    #include <drm/drm_print.h>
>>>>
>>>>    #include "gem/i915_gem_context.h"
>>>> +#include "intel_memory_region.h"
>>>>    #include "i915_drm_client.h"
>>>>    #include "i915_file_private.h"
>>>>    #include "i915_gem.h"
>>>> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>>>>           for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
>>>>                   show_client_class(p, i915, file_priv->client, i);
>>>>    }
>>>> +
>>>> +char **
>>>> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
>>>> +{
>>>> +       struct drm_i915_private *i915 = to_i915(dev);
>>>> +       struct intel_memory_region *mr;
>>>> +       enum intel_region_id id;
>>>> +
>>>> +       /* FIXME move to init */
>>>> +       for_each_memory_region(mr, i915, id) {
>>>> +               if (!i915->mm.region_names[id])
>>>> +                       i915->mm.region_names[id] = mr->name;
>>>> +       }
>>>> +
>>>> +       *num = id;
>>>> +
>>>> +       return i915->mm.region_names;
>>>> +}
>>>> +
>>>> +static void
>>>> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
>>>> +{
>>>> +        struct intel_memory_region *mr;
>>>> +       u64 sz = obj->base.size;
>>>> +        enum intel_region_id id;
>>>> +       unsigned int i;
>>>> +
>>>> +       if (!obj)
>>>> +               return;
>>>> +
>>>> +       /* Attribute size and shared to all possible memory regions. */
>>>> +       for (i = 0; i < obj->mm.n_placements; i++) {
>>>> +               mr = obj->mm.placements[i];
>>>> +               id = mr->id;
>>>> +
>>>> +               stats[id].size += sz;
>>>
>>> This implies that summing up all of the categories is not the same as
>>> the toplevel stats that I was proposing
> 
> Sorry, I mis-spoke, I meant "summing up all of the regions is not..."

Ah okay. It could be made like that yes.

I wasn't sure what would be more useful for drivers which support memory 
regions. To see how much memory file could be using worst case, or 
strictly how much it is currently using. So for buffer objects where 
userspace allows kernel to choose the region from a supplied list, I 
thought it would be useful to show that in total size against all 
possible regions.

In a way you see this driver /could/ be using 1G in vram and 1G in 
system, but currently it only has resident 1G in vram. Or you see 
another file which has 1G vram size and 1G resident size and you can 
infer some things.

Perhaps that can be confusing and it would be better to let total size 
migrate between regions at runtime as does resident and other 
categories. But then the total size per region would change at runtime 
influenced by other app activity (as driver is transparently migrating 
buffers between regions). Which can also be very confusing, it would 
appear as if the app is creating/freeing objects when it isn't.
>> Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.
>>
> 
> I split shared because by definition shared buffers can be counted
> against multiple drm_file's, whereas private is only counted against
> the single drm_file.  Driver or app changes are unlikely to change the
> shared size, whereas private footprint is a thing you can optimize to
> some degree.
 >
>> Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.
> 
> Imported vs exported doesn't really matter.. it is just an
> implementation detail of the winsys.  But I think it is useful to know
> how much of an app's footprint is shared vs private.  You could
> express it different ways, but my proposal had private and shared,
> from which you can calculate total:
> 
>     total = private + shared
> 
> but you could flip the path around and advertise just total and
> shared, and calculate private from that.

Yeah I am not sure. My gut feeling was that stable "top level" size is 
the best option. Aka "this is how much this file could be using worst case".

If shared for file A can drop once file B closes the object it 
previously imported from A, I think that could be confusing. Because A 
did nothing - it is not suddenly using more private memory (hasn't 
allocated anything) nor has closed any shared memory objects.

And on a tangent, but what about shared vs private stats when we have 
userptr object created from shared memory? Core cannot really untangle 
those. Or the memory allocated for other than buffer objects as I argue 
in the cover letter.

Regards,

Tvrtko
Rob Clark April 19, 2023, 2:38 p.m. UTC | #5
On Wed, Apr 19, 2023 at 7:06 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 18/04/2023 17:08, Rob Clark wrote:
> > On Tue, Apr 18, 2023 at 7:58 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >> On 18/04/2023 15:39, Rob Clark wrote:
> >>> On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
> >>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>>
> >>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>
> >>>> Show how more driver specific set of memory stats could be shown,
> >>>> more specifically where object can reside in multiple regions, showing all
> >>>> the supported stats, and where there is more to show than just user visible
> >>>> objects.
> >>>>
> >>>> WIP...
> >>>>
> >>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>> ---
> >>>>    drivers/gpu/drm/i915/i915_driver.c     |   5 ++
> >>>>    drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
> >>>>    drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
> >>>>    drivers/gpu/drm/i915/i915_drv.h        |   2 +
> >>>>    4 files changed, 117 insertions(+)
> >>>>
> >>>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> >>>> index 6493548c69bf..4c70206cbc27 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_driver.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
> >>>> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
> >>>>           .dumb_create = i915_gem_dumb_create,
> >>>>           .dumb_map_offset = i915_gem_dumb_mmap_offset,
> >>>>
> >>>> +#ifdef CONFIG_PROC_FS
> >>>> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
> >>>> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
> >>>> +#endif
> >>>> +
> >>>>           .ioctls = i915_ioctls,
> >>>>           .num_ioctls = ARRAY_SIZE(i915_ioctls),
> >>>>           .fops = &i915_driver_fops,
> >>>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
> >>>> index c654984189f7..65857c68bdb3 100644
> >>>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
> >>>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> >>>> @@ -12,6 +12,7 @@
> >>>>    #include <drm/drm_print.h>
> >>>>
> >>>>    #include "gem/i915_gem_context.h"
> >>>> +#include "intel_memory_region.h"
> >>>>    #include "i915_drm_client.h"
> >>>>    #include "i915_file_private.h"
> >>>>    #include "i915_gem.h"
> >>>> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
> >>>>           for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
> >>>>                   show_client_class(p, i915, file_priv->client, i);
> >>>>    }
> >>>> +
> >>>> +char **
> >>>> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
> >>>> +{
> >>>> +       struct drm_i915_private *i915 = to_i915(dev);
> >>>> +       struct intel_memory_region *mr;
> >>>> +       enum intel_region_id id;
> >>>> +
> >>>> +       /* FIXME move to init */
> >>>> +       for_each_memory_region(mr, i915, id) {
> >>>> +               if (!i915->mm.region_names[id])
> >>>> +                       i915->mm.region_names[id] = mr->name;
> >>>> +       }
> >>>> +
> >>>> +       *num = id;
> >>>> +
> >>>> +       return i915->mm.region_names;
> >>>> +}
> >>>> +
> >>>> +static void
> >>>> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
> >>>> +{
> >>>> +        struct intel_memory_region *mr;
> >>>> +       u64 sz = obj->base.size;
> >>>> +        enum intel_region_id id;
> >>>> +       unsigned int i;
> >>>> +
> >>>> +       if (!obj)
> >>>> +               return;
> >>>> +
> >>>> +       /* Attribute size and shared to all possible memory regions. */
> >>>> +       for (i = 0; i < obj->mm.n_placements; i++) {
> >>>> +               mr = obj->mm.placements[i];
> >>>> +               id = mr->id;
> >>>> +
> >>>> +               stats[id].size += sz;
> >>>
> >>> This implies that summing up all of the categories is not the same as
> >>> the toplevel stats that I was proposing
> >
> > Sorry, I mis-spoke, I meant "summing up all of the regions is not..."
>
> Ah okay. It could be made like that yes.
>
> I wasn't sure what would be more useful for drivers which support memory
> regions. To see how much memory file could be using worst case, or
> strictly how much it is currently using. So for buffer objects where
> userspace allows kernel to choose the region from a supplied list, I
> thought it would be useful to show that in total size against all
> possible regions.
>
> In a way you see this driver /could/ be using 1G in vram and 1G in
> system, but currently it only has resident 1G in vram. Or you see
> another file which has 1G vram size and 1G resident size and you can
> infer some things.

AFAIU all the buffers could exist in system memory at some point in
time, and vram is more like an explicitly managed fast cache.  Like,
what happens on suspend to ram or hibernate, I assume you don't keep
vram powered?

> Perhaps that can be confusing and it would be better to let total size
> migrate between regions at runtime as does resident and other
> categories. But then the total size per region would change at runtime
> influenced by other app activity (as driver is transparently migrating
> buffers between regions). Which can also be very confusing, it would
> appear as if the app is creating/freeing objects when it isn't.
> >> Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.
> >>
> >
> > I split shared because by definition shared buffers can be counted
> > against multiple drm_file's, whereas private is only counted against
> > the single drm_file.  Driver or app changes are unlikely to change the
> > shared size, whereas private footprint is a thing you can optimize to
> > some degree.
>  >
> >> Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.
> >
> > Imported vs exported doesn't really matter.. it is just an
> > implementation detail of the winsys.  But I think it is useful to know
> > how much of an app's footprint is shared vs private.  You could
> > express it different ways, but my proposal had private and shared,
> > from which you can calculate total:
> >
> >     total = private + shared
> >
> > but you could flip the path around and advertise just total and
> > shared, and calculate private from that.
>
> Yeah I am not sure. My gut feeling was that stable "top level" size is
> the best option. Aka "this is how much this file could be using worst case".
>
> If shared for file A can drop once file B closes the object it
> previously imported from A, I think that could be confusing. Because A
> did nothing - it is not suddenly using more private memory (hasn't
> allocated anything) nor has closed any shared memory objects.

ok, fair

> And on a tangent, but what about shared vs private stats when we have
> userptr object created from shared memory? Core cannot really untangle
> those. Or the memory allocated for other than buffer objects as I argue
> in the cover letter.

hmm, not sure.. I'd be inclined to just count them as private.  Are
you allowed to dma-buf export a userptr buffer?  That seems like it
could go pretty badly..

BR,
-R

> Regards,
>
> Tvrtko
Tvrtko Ursulin April 20, 2023, 1:11 p.m. UTC | #6
On 19/04/2023 15:38, Rob Clark wrote:
> On Wed, Apr 19, 2023 at 7:06 AM Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>>
>>
>> On 18/04/2023 17:08, Rob Clark wrote:
>>> On Tue, Apr 18, 2023 at 7:58 AM Tvrtko Ursulin
>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>> On 18/04/2023 15:39, Rob Clark wrote:
>>>>> On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
>>>>> <tvrtko.ursulin@linux.intel.com> wrote:
>>>>>>
>>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>>
>>>>>> Show how more driver specific set of memory stats could be shown,
>>>>>> more specifically where object can reside in multiple regions, showing all
>>>>>> the supported stats, and where there is more to show than just user visible
>>>>>> objects.
>>>>>>
>>>>>> WIP...
>>>>>>
>>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>>> ---
>>>>>>     drivers/gpu/drm/i915/i915_driver.c     |   5 ++
>>>>>>     drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
>>>>>>     drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
>>>>>>     drivers/gpu/drm/i915/i915_drv.h        |   2 +
>>>>>>     4 files changed, 117 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
>>>>>> index 6493548c69bf..4c70206cbc27 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_driver.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
>>>>>> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
>>>>>>            .dumb_create = i915_gem_dumb_create,
>>>>>>            .dumb_map_offset = i915_gem_dumb_mmap_offset,
>>>>>>
>>>>>> +#ifdef CONFIG_PROC_FS
>>>>>> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
>>>>>> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
>>>>>> +#endif
>>>>>> +
>>>>>>            .ioctls = i915_ioctls,
>>>>>>            .num_ioctls = ARRAY_SIZE(i915_ioctls),
>>>>>>            .fops = &i915_driver_fops,
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
>>>>>> index c654984189f7..65857c68bdb3 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
>>>>>> @@ -12,6 +12,7 @@
>>>>>>     #include <drm/drm_print.h>
>>>>>>
>>>>>>     #include "gem/i915_gem_context.h"
>>>>>> +#include "intel_memory_region.h"
>>>>>>     #include "i915_drm_client.h"
>>>>>>     #include "i915_file_private.h"
>>>>>>     #include "i915_gem.h"
>>>>>> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
>>>>>>            for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
>>>>>>                    show_client_class(p, i915, file_priv->client, i);
>>>>>>     }
>>>>>> +
>>>>>> +char **
>>>>>> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
>>>>>> +{
>>>>>> +       struct drm_i915_private *i915 = to_i915(dev);
>>>>>> +       struct intel_memory_region *mr;
>>>>>> +       enum intel_region_id id;
>>>>>> +
>>>>>> +       /* FIXME move to init */
>>>>>> +       for_each_memory_region(mr, i915, id) {
>>>>>> +               if (!i915->mm.region_names[id])
>>>>>> +                       i915->mm.region_names[id] = mr->name;
>>>>>> +       }
>>>>>> +
>>>>>> +       *num = id;
>>>>>> +
>>>>>> +       return i915->mm.region_names;
>>>>>> +}
>>>>>> +
>>>>>> +static void
>>>>>> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
>>>>>> +{
>>>>>> +        struct intel_memory_region *mr;
>>>>>> +       u64 sz = obj->base.size;
>>>>>> +        enum intel_region_id id;
>>>>>> +       unsigned int i;
>>>>>> +
>>>>>> +       if (!obj)
>>>>>> +               return;
>>>>>> +
>>>>>> +       /* Attribute size and shared to all possible memory regions. */
>>>>>> +       for (i = 0; i < obj->mm.n_placements; i++) {
>>>>>> +               mr = obj->mm.placements[i];
>>>>>> +               id = mr->id;
>>>>>> +
>>>>>> +               stats[id].size += sz;
>>>>>
>>>>> This implies that summing up all of the categories is not the same as
>>>>> the toplevel stats that I was proposing
>>>
>>> Sorry, I mis-spoke, I meant "summing up all of the regions is not..."
>>
>> Ah okay. It could be made like that yes.
>>
>> I wasn't sure what would be more useful for drivers which support memory
>> regions. To see how much memory file could be using worst case, or
>> strictly how much it is currently using. So for buffer objects where
>> userspace allows kernel to choose the region from a supplied list, I
>> thought it would be useful to show that in total size against all
>> possible regions.
>>
>> In a way you see this driver /could/ be using 1G in vram and 1G in
>> system, but currently it only has resident 1G in vram. Or you see
>> another file which has 1G vram size and 1G resident size and you can
>> infer some things.
> 
> AFAIU all the buffers could exist in system memory at some point in
> time, and vram is more like an explicitly managed fast cache.  Like,
> what happens on suspend to ram or hibernate, I assume you don't keep
> vram powered?

Yeah they can be swapped out on suspend, but that's different that 
buffers which are explicitly marked as being allowed to exist in either 
region at runtime.

>> Perhaps that can be confusing and it would be better to let total size
>> migrate between regions at runtime as does resident and other
>> categories. But then the total size per region would change at runtime
>> influenced by other app activity (as driver is transparently migrating
>> buffers between regions). Which can also be very confusing, it would
>> appear as if the app is creating/freeing objects when it isn't.
>>>> Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.
>>>>
>>>
>>> I split shared because by definition shared buffers can be counted
>>> against multiple drm_file's, whereas private is only counted against
>>> the single drm_file.  Driver or app changes are unlikely to change the
>>> shared size, whereas private footprint is a thing you can optimize to
>>> some degree.
>>   >
>>>> Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.
>>>
>>> Imported vs exported doesn't really matter.. it is just an
>>> implementation detail of the winsys.  But I think it is useful to know
>>> how much of an app's footprint is shared vs private.  You could
>>> express it different ways, but my proposal had private and shared,
>>> from which you can calculate total:
>>>
>>>      total = private + shared
>>>
>>> but you could flip the path around and advertise just total and
>>> shared, and calculate private from that.
>>
>> Yeah I am not sure. My gut feeling was that stable "top level" size is
>> the best option. Aka "this is how much this file could be using worst case".
>>
>> If shared for file A can drop once file B closes the object it
>> previously imported from A, I think that could be confusing. Because A
>> did nothing - it is not suddenly using more private memory (hasn't
>> allocated anything) nor has closed any shared memory objects.
> 
> ok, fair
> 
>> And on a tangent, but what about shared vs private stats when we have
>> userptr object created from shared memory? Core cannot really untangle
>> those. Or the memory allocated for other than buffer objects as I argue
>> in the cover letter.
> 
> hmm, not sure.. I'd be inclined to just count them as private.  Are
> you allowed to dma-buf export a userptr buffer?  That seems like it
> could go pretty badly..

AFAIR we forbid that, but my point was more that there is shared memory 
and shared memory, not related to dma-buf I mean. Just that two 
processes could create two userptr objects from the same shared memory 
block. Memory accounting is as always complicated.

Regards,

Tvrtko
Rob Clark April 21, 2023, 1:26 a.m. UTC | #7
On Thu, Apr 20, 2023 at 6:11 AM Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
>
>
> On 19/04/2023 15:38, Rob Clark wrote:
> > On Wed, Apr 19, 2023 at 7:06 AM Tvrtko Ursulin
> > <tvrtko.ursulin@linux.intel.com> wrote:
> >>
> >>
> >> On 18/04/2023 17:08, Rob Clark wrote:
> >>> On Tue, Apr 18, 2023 at 7:58 AM Tvrtko Ursulin
> >>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>> On 18/04/2023 15:39, Rob Clark wrote:
> >>>>> On Mon, Apr 17, 2023 at 8:56 AM Tvrtko Ursulin
> >>>>> <tvrtko.ursulin@linux.intel.com> wrote:
> >>>>>>
> >>>>>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>>
> >>>>>> Show how more driver specific set of memory stats could be shown,
> >>>>>> more specifically where object can reside in multiple regions, showing all
> >>>>>> the supported stats, and where there is more to show than just user visible
> >>>>>> objects.
> >>>>>>
> >>>>>> WIP...
> >>>>>>
> >>>>>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>>>>> ---
> >>>>>>     drivers/gpu/drm/i915/i915_driver.c     |   5 ++
> >>>>>>     drivers/gpu/drm/i915/i915_drm_client.c | 102 +++++++++++++++++++++++++
> >>>>>>     drivers/gpu/drm/i915/i915_drm_client.h |   8 ++
> >>>>>>     drivers/gpu/drm/i915/i915_drv.h        |   2 +
> >>>>>>     4 files changed, 117 insertions(+)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
> >>>>>> index 6493548c69bf..4c70206cbc27 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_driver.c
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_driver.c
> >>>>>> @@ -1806,6 +1806,11 @@ static const struct drm_driver i915_drm_driver = {
> >>>>>>            .dumb_create = i915_gem_dumb_create,
> >>>>>>            .dumb_map_offset = i915_gem_dumb_mmap_offset,
> >>>>>>
> >>>>>> +#ifdef CONFIG_PROC_FS
> >>>>>> +       .query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
> >>>>>> +       .query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
> >>>>>> +#endif
> >>>>>> +
> >>>>>>            .ioctls = i915_ioctls,
> >>>>>>            .num_ioctls = ARRAY_SIZE(i915_ioctls),
> >>>>>>            .fops = &i915_driver_fops,
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
> >>>>>> index c654984189f7..65857c68bdb3 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_drm_client.c
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_drm_client.c
> >>>>>> @@ -12,6 +12,7 @@
> >>>>>>     #include <drm/drm_print.h>
> >>>>>>
> >>>>>>     #include "gem/i915_gem_context.h"
> >>>>>> +#include "intel_memory_region.h"
> >>>>>>     #include "i915_drm_client.h"
> >>>>>>     #include "i915_file_private.h"
> >>>>>>     #include "i915_gem.h"
> >>>>>> @@ -112,4 +113,105 @@ void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
> >>>>>>            for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
> >>>>>>                    show_client_class(p, i915, file_priv->client, i);
> >>>>>>     }
> >>>>>> +
> >>>>>> +char **
> >>>>>> +i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
> >>>>>> +{
> >>>>>> +       struct drm_i915_private *i915 = to_i915(dev);
> >>>>>> +       struct intel_memory_region *mr;
> >>>>>> +       enum intel_region_id id;
> >>>>>> +
> >>>>>> +       /* FIXME move to init */
> >>>>>> +       for_each_memory_region(mr, i915, id) {
> >>>>>> +               if (!i915->mm.region_names[id])
> >>>>>> +                       i915->mm.region_names[id] = mr->name;
> >>>>>> +       }
> >>>>>> +
> >>>>>> +       *num = id;
> >>>>>> +
> >>>>>> +       return i915->mm.region_names;
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void
> >>>>>> +add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
> >>>>>> +{
> >>>>>> +        struct intel_memory_region *mr;
> >>>>>> +       u64 sz = obj->base.size;
> >>>>>> +        enum intel_region_id id;
> >>>>>> +       unsigned int i;
> >>>>>> +
> >>>>>> +       if (!obj)
> >>>>>> +               return;
> >>>>>> +
> >>>>>> +       /* Attribute size and shared to all possible memory regions. */
> >>>>>> +       for (i = 0; i < obj->mm.n_placements; i++) {
> >>>>>> +               mr = obj->mm.placements[i];
> >>>>>> +               id = mr->id;
> >>>>>> +
> >>>>>> +               stats[id].size += sz;
> >>>>>
> >>>>> This implies that summing up all of the categories is not the same as
> >>>>> the toplevel stats that I was proposing
> >>>
> >>> Sorry, I mis-spoke, I meant "summing up all of the regions is not..."
> >>
> >> Ah okay. It could be made like that yes.
> >>
> >> I wasn't sure what would be more useful for drivers which support memory
> >> regions. To see how much memory file could be using worst case, or
> >> strictly how much it is currently using. So for buffer objects where
> >> userspace allows kernel to choose the region from a supplied list, I
> >> thought it would be useful to show that in total size against all
> >> possible regions.
> >>
> >> In a way you see this driver /could/ be using 1G in vram and 1G in
> >> system, but currently it only has resident 1G in vram. Or you see
> >> another file which has 1G vram size and 1G resident size and you can
> >> infer some things.
> >
> > AFAIU all the buffers could exist in system memory at some point in
> > time, and vram is more like an explicitly managed fast cache.  Like,
> > what happens on suspend to ram or hibernate, I assume you don't keep
> > vram powered?
>
> Yeah they can be swapped out on suspend, but that's different that
> buffers which are explicitly marked as being allowed to exist in either
> region at runtime.

it sounds, at least from the PoV of system memory usage, they still
need pages in system memory so they have somewhere to live when
swapped out of vram?

> >> Perhaps that can be confusing and it would be better to let total size
> >> migrate between regions at runtime as does resident and other
> >> categories. But then the total size per region would change at runtime
> >> influenced by other app activity (as driver is transparently migrating
> >> buffers between regions). Which can also be very confusing, it would
> >> appear as if the app is creating/freeing objects when it isn't.
> >>>> Correct, my categories are a bit different. You had private and shared as two mutually exclusive buckets, and then resident as subset of either/both. I have size as analogue to VmSize and resident as a subset of that, analogue to VmRss.
> >>>>
> >>>
> >>> I split shared because by definition shared buffers can be counted
> >>> against multiple drm_file's, whereas private is only counted against
> >>> the single drm_file.  Driver or app changes are unlikely to change the
> >>> shared size, whereas private footprint is a thing you can optimize to
> >>> some degree.
> >>   >
> >>>> Shared is a bit wishy-washy, not sure about that one in either proposals. It can be either imported or exported buffers, but in essence I think it fits better as a subset of total size.
> >>>
> >>> Imported vs exported doesn't really matter.. it is just an
> >>> implementation detail of the winsys.  But I think it is useful to know
> >>> how much of an app's footprint is shared vs private.  You could
> >>> express it different ways, but my proposal had private and shared,
> >>> from which you can calculate total:
> >>>
> >>>      total = private + shared
> >>>
> >>> but you could flip the path around and advertise just total and
> >>> shared, and calculate private from that.
> >>
> >> Yeah I am not sure. My gut feeling was that stable "top level" size is
> >> the best option. Aka "this is how much this file could be using worst case".
> >>
> >> If shared for file A can drop once file B closes the object it
> >> previously imported from A, I think that could be confusing. Because A
> >> did nothing - it is not suddenly using more private memory (hasn't
> >> allocated anything) nor has closed any shared memory objects.
> >
> > ok, fair
> >
> >> And on a tangent, but what about shared vs private stats when we have
> >> userptr object created from shared memory? Core cannot really untangle
> >> those. Or the memory allocated for other than buffer objects as I argue
> >> in the cover letter.
> >
> > hmm, not sure.. I'd be inclined to just count them as private.  Are
> > you allowed to dma-buf export a userptr buffer?  That seems like it
> > could go pretty badly..
>
> AFAIR we forbid that, but my point was more that there is shared memory
> and shared memory, not related to dma-buf I mean. Just that two
> processes could create two userptr objects from the same shared memory
> block. Memory accounting is as always complicated.

don't let "perfect" be the enemy of "good", especially in this case
when it sounds like "perfect" isn't even possible ;-)

ie, we should just count as private vs shared based on handle count

BR,
-R

> Regards,
>
> Tvrtko
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 6493548c69bf..4c70206cbc27 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -1806,6 +1806,11 @@  static const struct drm_driver i915_drm_driver = {
 	.dumb_create = i915_gem_dumb_create,
 	.dumb_map_offset = i915_gem_dumb_mmap_offset,
 
+#ifdef CONFIG_PROC_FS
+	.query_fdinfo_memory_regions = i915_query_fdinfo_memory_regions,
+	.query_fdinfo_memory_stats = i915_query_fdinfo_memory_stats,
+#endif
+
 	.ioctls = i915_ioctls,
 	.num_ioctls = ARRAY_SIZE(i915_ioctls),
 	.fops = &i915_driver_fops,
diff --git a/drivers/gpu/drm/i915/i915_drm_client.c b/drivers/gpu/drm/i915/i915_drm_client.c
index c654984189f7..65857c68bdb3 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.c
+++ b/drivers/gpu/drm/i915/i915_drm_client.c
@@ -12,6 +12,7 @@ 
 #include <drm/drm_print.h>
 
 #include "gem/i915_gem_context.h"
+#include "intel_memory_region.h"
 #include "i915_drm_client.h"
 #include "i915_file_private.h"
 #include "i915_gem.h"
@@ -112,4 +113,105 @@  void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file)
 	for (i = 0; i < ARRAY_SIZE(uabi_class_names); i++)
 		show_client_class(p, i915, file_priv->client, i);
 }
+
+char **
+i915_query_fdinfo_memory_regions(struct drm_device *dev, unsigned int *num)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	/* FIXME move to init */
+	for_each_memory_region(mr, i915, id) {
+		if (!i915->mm.region_names[id])
+			i915->mm.region_names[id] = mr->name;
+	}
+
+	*num = id;
+
+	return i915->mm.region_names;
+}
+
+static void
+add_obj(struct drm_i915_gem_object *obj, struct drm_fdinfo_memory_stat *stats)
+{
+        struct intel_memory_region *mr;
+	u64 sz = obj->base.size;
+        enum intel_region_id id;
+	unsigned int i;
+
+	if (!obj)
+		return;
+
+	/* Attribute size and shared to all possible memory regions. */
+	for (i = 0; i < obj->mm.n_placements; i++) {
+		mr = obj->mm.placements[i];
+		id = mr->id;
+
+		stats[id].size += sz;
+		if (obj->base.handle_count > 1)
+			stats[id].shared += sz;
+	}
+
+	/* Attribute other categories to only the current region. */
+	mr = obj->mm.region;
+	if (mr)
+		id = mr->id;
+	else
+		id = INTEL_REGION_SMEM;
+
+	if (!i915_gem_object_has_pages(obj))
+		return;
+
+	stats[id].resident += sz;
+
+	if (!dma_resv_test_signaled(obj->base.resv, dma_resv_usage_rw(true)))
+		stats[id].active += sz;
+	else if (i915_gem_object_is_shrinkable(obj) &&
+		obj->mm.madv == I915_MADV_DONTNEED)
+		stats[id].purgeable += sz;
+}
+
+void
+i915_query_fdinfo_memory_stats(struct drm_file *file,
+			       struct drm_fdinfo_memory_stat *stats)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_drm_client *client = file_priv->client;
+	struct drm_gem_object *drm_obj;
+	struct i915_gem_context *ctx;
+	int id;
+
+	/*
+	 * FIXME - we can do this better and in fewer passes if we are to start
+	 * exporting proper memory stats.
+	 */
+
+	/* User created objects */
+	spin_lock(&file->table_lock);
+	idr_for_each_entry(&file->object_idr, drm_obj, id)
+		add_obj(to_intel_bo(drm_obj), stats);
+	spin_unlock(&file->table_lock);
+
+	/* Contexts, rings, timelines, page tables, ... */
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &client->ctx_list, client_link) {
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		for_each_gem_engine(ce, rcu_dereference(ctx->engines), it) {
+			/* FIXME races?! */
+			if (ce->state)
+				add_obj(ce->state->obj, stats);
+			if (ce->timeline && ce->timeline->hwsp_ggtt)
+				add_obj(ce->timeline->hwsp_ggtt->obj, stats);
+			if (ce->ring && ce->ring->vma)
+				add_obj(ce->ring->vma->obj, stats);
+		}
+
+		/* TODO  vtx->vm page table backing objects */
+	}
+	rcu_read_unlock();
+}
+
 #endif
diff --git a/drivers/gpu/drm/i915/i915_drm_client.h b/drivers/gpu/drm/i915/i915_drm_client.h
index 4c18b99e10a4..622936c51903 100644
--- a/drivers/gpu/drm/i915/i915_drm_client.h
+++ b/drivers/gpu/drm/i915/i915_drm_client.h
@@ -14,7 +14,10 @@ 
 
 #define I915_LAST_UABI_ENGINE_CLASS I915_ENGINE_CLASS_COMPUTE
 
+struct drm_device;
 struct drm_file;
+struct drm_fdinfo_memory_stat;
+struct drm_gem_object;
 struct drm_printer;
 
 struct i915_drm_client {
@@ -49,6 +52,11 @@  struct i915_drm_client *i915_drm_client_alloc(void);
 
 #ifdef CONFIG_PROC_FS
 void i915_drm_client_fdinfo(struct drm_printer *p, struct drm_file *file);
+
+char **i915_query_fdinfo_memory_regions(struct drm_device *dev,
+					unsigned int *num);
+void i915_query_fdinfo_memory_stats(struct drm_file *file,
+				    struct drm_fdinfo_memory_stat *stats);
 #endif
 
 #endif /* !__I915_DRM_CLIENT_H__ */
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index eb739fb9cdbb..b84d2f0ed2cb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -161,6 +161,8 @@  struct i915_gem_mm {
 
 	struct intel_memory_region *regions[INTEL_REGION_UNKNOWN];
 
+	char *region_names[INTEL_REGION_UNKNOWN];
+
 	struct notifier_block oom_notifier;
 	struct notifier_block vmap_notifier;
 	struct shrinker shrinker;