diff mbox series

[RFC,v3,11/11] drm, cgroup: Allow more aggressive memory reclaim

Message ID 20190626150522.11618-12-Kenny.Ho@amd.com (mailing list archive)
State New, archived
Headers show
Series new cgroup controller for gpu/drm subsystem | expand

Commit Message

Ho, Kenny June 26, 2019, 3:05 p.m. UTC
Allow DRM TTM memory manager to register a work_struct, such that, when
a drmcgrp is under memory pressure, memory reclaiming can be triggered
immediately.

Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c    | 47 +++++++++++++++++++++++++++++++++
 include/drm/drm_cgroup.h        | 14 ++++++++++
 include/drm/ttm/ttm_bo_driver.h |  2 ++
 kernel/cgroup/drm.c             | 33 +++++++++++++++++++++++
 4 files changed, 96 insertions(+)

Comments

Daniel Vetter June 26, 2019, 4:44 p.m. UTC | #1
On Wed, Jun 26, 2019 at 11:05:22AM -0400, Kenny Ho wrote:
> Allow DRM TTM memory manager to register a work_struct, such that, when
> a drmcgrp is under memory pressure, memory reclaiming can be triggered
> immediately.
> 
> Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
> ---
>  drivers/gpu/drm/ttm/ttm_bo.c    | 47 +++++++++++++++++++++++++++++++++
>  include/drm/drm_cgroup.h        | 14 ++++++++++
>  include/drm/ttm/ttm_bo_driver.h |  2 ++
>  kernel/cgroup/drm.c             | 33 +++++++++++++++++++++++
>  4 files changed, 96 insertions(+)
> 
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> index 79c530f4a198..5fc3bc5bd4c5 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -1509,6 +1509,44 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
>  }
>  EXPORT_SYMBOL(ttm_bo_evict_mm);
>  
> +static void ttm_bo_reclaim_wq(struct work_struct *work)
> +{

I think a design a bit more inspired by memcg aware core shrinkers would
be nice, i.e. explicitly passing:
- which drm_cgroup needs to be shrunk
- which ttm_mem_reg (well the fancy new abstracted out stuff for tracking
  special gpu memory resources like tt or vram or whatever)
- how much it needs to be shrunk

I think with that a lot more the book-keeping could be pushed into the
drm_cgroup code, and the callback just needs to actually shrink enough as
requested.
-Daniel

> +	struct ttm_operation_ctx ctx = {
> +		.interruptible = false,
> +		.no_wait_gpu = false,
> +		.flags = TTM_OPT_FLAG_FORCE_ALLOC
> +	};
> +	struct ttm_mem_type_manager *man =
> +	    container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> +	struct ttm_bo_device *bdev = man->bdev;
> +	struct dma_fence *fence;
> +	int mem_type;
> +	int ret;
> +
> +	for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> +		if (&bdev->man[mem_type] == man)
> +			break;
> +
> +	BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
> +
> +	if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
> +		return;
> +
> +	ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> +	if (ret)
> +		return;
> +
> +	spin_lock(&man->move_lock);
> +	fence = dma_fence_get(man->move);
> +	spin_unlock(&man->move_lock);
> +
> +	if (fence) {
> +		ret = dma_fence_wait(fence, false);
> +		dma_fence_put(fence);
> +	}
> +
> +}
> +
>  int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>  			unsigned long p_size)
>  {
> @@ -1543,6 +1581,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
>  		INIT_LIST_HEAD(&man->lru[i]);
>  	man->move = NULL;
>  
> +	pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
> +
> +	if (type <= TTM_PL_VRAM) {
> +		INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> +		drmcgrp_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
> +	}
> +
>  	return 0;
>  }
>  EXPORT_SYMBOL(ttm_bo_init_mm);
> @@ -1620,6 +1665,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
>  		man = &bdev->man[i];
>  		if (man->has_type) {
>  			man->use_type = false;
> +			drmcgrp_unregister_device_mm(bdev->ddev, i);
> +			cancel_work_sync(&man->reclaim_wq);
>  			if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
>  				ret = -EBUSY;
>  				pr_err("DRM memory manager type %d is not clean\n",
> diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> index 360c1e6c809f..134d6e5475f3 100644
> --- a/include/drm/drm_cgroup.h
> +++ b/include/drm/drm_cgroup.h
> @@ -5,6 +5,7 @@
>  #define __DRM_CGROUP_H__
>  
>  #include <linux/cgroup_drm.h>
> +#include <linux/workqueue.h>
>  #include <drm/ttm/ttm_bo_api.h>
>  #include <drm/ttm/ttm_bo_driver.h>
>  
> @@ -12,6 +13,9 @@
>  
>  int drmcgrp_register_device(struct drm_device *device);
>  int drmcgrp_unregister_device(struct drm_device *device);
> +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> +		struct work_struct *wq);
> +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type);
>  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
>  		struct drmcgrp *relative);
>  void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> @@ -40,6 +44,16 @@ static inline int drmcgrp_unregister_device(struct drm_device *device)
>  	return 0;
>  }
>  
> +static inline void drmcgrp_register_device_mm(struct drm_device *dev,
> +		unsigned type, struct work_struct *wq)
> +{
> +}
> +
> +static inline void drmcgrp_unregister_device_mm(struct drm_device *dev,
> +		unsigned type)
> +{
> +}
> +
>  static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
>  		struct drmcgrp *relative)
>  {
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> index 4cbcb41e5aa9..0956ca7888fc 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
>  	 * Protected by @move_lock.
>  	 */
>  	struct dma_fence *move;
> +
> +	struct work_struct reclaim_wq;
>  };
>  
>  /**
> diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
> index 1ce13db36ce9..985a89e849d3 100644
> --- a/kernel/cgroup/drm.c
> +++ b/kernel/cgroup/drm.c
> @@ -31,6 +31,8 @@ struct drmcgrp_device {
>  	s64			mem_bw_avg_bytes_per_us_default;
>  
>  	s64			mem_highs_default[TTM_PL_PRIV+1];
> +
> +	struct work_struct	*mem_reclaim_wq[TTM_PL_PRIV];
>  };
>  
>  #define DRMCG_CTF_PRIV_SIZE 3
> @@ -793,6 +795,31 @@ int drmcgrp_unregister_device(struct drm_device *dev)
>  }
>  EXPORT_SYMBOL(drmcgrp_unregister_device);
>  
> +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> +		struct work_struct *wq)
> +{
> +	if (dev == NULL || dev->primary->index > max_minor
> +			|| type >= TTM_PL_PRIV)
> +		return;
> +
> +	mutex_lock(&drmcgrp_mutex);
> +	known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = wq;
> +	mutex_unlock(&drmcgrp_mutex);
> +}
> +EXPORT_SYMBOL(drmcgrp_register_device_mm);
> +
> +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type)
> +{
> +	if (dev == NULL || dev->primary->index > max_minor
> +			|| type >= TTM_PL_PRIV)
> +		return;
> +
> +	mutex_lock(&drmcgrp_mutex);
> +	known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = NULL;
> +	mutex_unlock(&drmcgrp_mutex);
> +}
> +EXPORT_SYMBOL(drmcgrp_unregister_device_mm);
> +
>  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self, struct drmcgrp *relative)
>  {
>  	for (; self != NULL; self = parent_drmcgrp(self))
> @@ -1004,6 +1031,12 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
>  
>  		ddr->mem_bw_stats[DRMCGRP_MEM_BW_ATTR_BYTE_CREDIT]
>  			-= move_in_bytes;
> +
> +		if (known_dev->mem_reclaim_wq[new_mem_type] != NULL &&
> +                        ddr->mem_stats[new_mem_type] >
> +				ddr->mem_highs[new_mem_type])
> +			schedule_work(
> +				known_dev->mem_reclaim_wq[new_mem_type]);
>  	}
>  	mutex_unlock(&known_dev->mutex);
>  }
> -- 
> 2.21.0
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Kenny Ho June 26, 2019, 10:52 p.m. UTC | #2
Ok.  I am not too familiar with shrinker but I will dig into it.  Just
so that I am looking into the right things, you are referring to
things like struct shrinker and struct shrink_control?

Regards,
Kenny

On Wed, Jun 26, 2019 at 12:44 PM Daniel Vetter <daniel@ffwll.ch> wrote:
>
> On Wed, Jun 26, 2019 at 11:05:22AM -0400, Kenny Ho wrote:
> > Allow DRM TTM memory manager to register a work_struct, such that, when
> > a drmcgrp is under memory pressure, memory reclaiming can be triggered
> > immediately.
> >
> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
> > ---
> >  drivers/gpu/drm/ttm/ttm_bo.c    | 47 +++++++++++++++++++++++++++++++++
> >  include/drm/drm_cgroup.h        | 14 ++++++++++
> >  include/drm/ttm/ttm_bo_driver.h |  2 ++
> >  kernel/cgroup/drm.c             | 33 +++++++++++++++++++++++
> >  4 files changed, 96 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > index 79c530f4a198..5fc3bc5bd4c5 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > @@ -1509,6 +1509,44 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
> >  }
> >  EXPORT_SYMBOL(ttm_bo_evict_mm);
> >
> > +static void ttm_bo_reclaim_wq(struct work_struct *work)
> > +{
>
> I think a design a bit more inspired by memcg aware core shrinkers would
> be nice, i.e. explicitly passing:
> - which drm_cgroup needs to be shrunk
> - which ttm_mem_reg (well the fancy new abstracted out stuff for tracking
>   special gpu memory resources like tt or vram or whatever)
> - how much it needs to be shrunk
>
> I think with that a lot more the book-keeping could be pushed into the
> drm_cgroup code, and the callback just needs to actually shrink enough as
> requested.
> -Daniel
>
> > +     struct ttm_operation_ctx ctx = {
> > +             .interruptible = false,
> > +             .no_wait_gpu = false,
> > +             .flags = TTM_OPT_FLAG_FORCE_ALLOC
> > +     };
> > +     struct ttm_mem_type_manager *man =
> > +         container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> > +     struct ttm_bo_device *bdev = man->bdev;
> > +     struct dma_fence *fence;
> > +     int mem_type;
> > +     int ret;
> > +
> > +     for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> > +             if (&bdev->man[mem_type] == man)
> > +                     break;
> > +
> > +     BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
> > +
> > +     if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
> > +             return;
> > +
> > +     ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> > +     if (ret)
> > +             return;
> > +
> > +     spin_lock(&man->move_lock);
> > +     fence = dma_fence_get(man->move);
> > +     spin_unlock(&man->move_lock);
> > +
> > +     if (fence) {
> > +             ret = dma_fence_wait(fence, false);
> > +             dma_fence_put(fence);
> > +     }
> > +
> > +}
> > +
> >  int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> >                       unsigned long p_size)
> >  {
> > @@ -1543,6 +1581,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> >               INIT_LIST_HEAD(&man->lru[i]);
> >       man->move = NULL;
> >
> > +     pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
> > +
> > +     if (type <= TTM_PL_VRAM) {
> > +             INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> > +             drmcgrp_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
> > +     }
> > +
> >       return 0;
> >  }
> >  EXPORT_SYMBOL(ttm_bo_init_mm);
> > @@ -1620,6 +1665,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
> >               man = &bdev->man[i];
> >               if (man->has_type) {
> >                       man->use_type = false;
> > +                     drmcgrp_unregister_device_mm(bdev->ddev, i);
> > +                     cancel_work_sync(&man->reclaim_wq);
> >                       if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
> >                               ret = -EBUSY;
> >                               pr_err("DRM memory manager type %d is not clean\n",
> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > index 360c1e6c809f..134d6e5475f3 100644
> > --- a/include/drm/drm_cgroup.h
> > +++ b/include/drm/drm_cgroup.h
> > @@ -5,6 +5,7 @@
> >  #define __DRM_CGROUP_H__
> >
> >  #include <linux/cgroup_drm.h>
> > +#include <linux/workqueue.h>
> >  #include <drm/ttm/ttm_bo_api.h>
> >  #include <drm/ttm/ttm_bo_driver.h>
> >
> > @@ -12,6 +13,9 @@
> >
> >  int drmcgrp_register_device(struct drm_device *device);
> >  int drmcgrp_unregister_device(struct drm_device *device);
> > +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> > +             struct work_struct *wq);
> > +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type);
> >  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> >               struct drmcgrp *relative);
> >  void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > @@ -40,6 +44,16 @@ static inline int drmcgrp_unregister_device(struct drm_device *device)
> >       return 0;
> >  }
> >
> > +static inline void drmcgrp_register_device_mm(struct drm_device *dev,
> > +             unsigned type, struct work_struct *wq)
> > +{
> > +}
> > +
> > +static inline void drmcgrp_unregister_device_mm(struct drm_device *dev,
> > +             unsigned type)
> > +{
> > +}
> > +
> >  static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> >               struct drmcgrp *relative)
> >  {
> > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> > index 4cbcb41e5aa9..0956ca7888fc 100644
> > --- a/include/drm/ttm/ttm_bo_driver.h
> > +++ b/include/drm/ttm/ttm_bo_driver.h
> > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
> >        * Protected by @move_lock.
> >        */
> >       struct dma_fence *move;
> > +
> > +     struct work_struct reclaim_wq;
> >  };
> >
> >  /**
> > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
> > index 1ce13db36ce9..985a89e849d3 100644
> > --- a/kernel/cgroup/drm.c
> > +++ b/kernel/cgroup/drm.c
> > @@ -31,6 +31,8 @@ struct drmcgrp_device {
> >       s64                     mem_bw_avg_bytes_per_us_default;
> >
> >       s64                     mem_highs_default[TTM_PL_PRIV+1];
> > +
> > +     struct work_struct      *mem_reclaim_wq[TTM_PL_PRIV];
> >  };
> >
> >  #define DRMCG_CTF_PRIV_SIZE 3
> > @@ -793,6 +795,31 @@ int drmcgrp_unregister_device(struct drm_device *dev)
> >  }
> >  EXPORT_SYMBOL(drmcgrp_unregister_device);
> >
> > +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> > +             struct work_struct *wq)
> > +{
> > +     if (dev == NULL || dev->primary->index > max_minor
> > +                     || type >= TTM_PL_PRIV)
> > +             return;
> > +
> > +     mutex_lock(&drmcgrp_mutex);
> > +     known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = wq;
> > +     mutex_unlock(&drmcgrp_mutex);
> > +}
> > +EXPORT_SYMBOL(drmcgrp_register_device_mm);
> > +
> > +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type)
> > +{
> > +     if (dev == NULL || dev->primary->index > max_minor
> > +                     || type >= TTM_PL_PRIV)
> > +             return;
> > +
> > +     mutex_lock(&drmcgrp_mutex);
> > +     known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = NULL;
> > +     mutex_unlock(&drmcgrp_mutex);
> > +}
> > +EXPORT_SYMBOL(drmcgrp_unregister_device_mm);
> > +
> >  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self, struct drmcgrp *relative)
> >  {
> >       for (; self != NULL; self = parent_drmcgrp(self))
> > @@ -1004,6 +1031,12 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
> >
> >               ddr->mem_bw_stats[DRMCGRP_MEM_BW_ATTR_BYTE_CREDIT]
> >                       -= move_in_bytes;
> > +
> > +             if (known_dev->mem_reclaim_wq[new_mem_type] != NULL &&
> > +                        ddr->mem_stats[new_mem_type] >
> > +                             ddr->mem_highs[new_mem_type])
> > +                     schedule_work(
> > +                             known_dev->mem_reclaim_wq[new_mem_type]);
> >       }
> >       mutex_unlock(&known_dev->mutex);
> >  }
> > --
> > 2.21.0
> >
> > _______________________________________________
> > dri-devel mailing list
> > dri-devel@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/dri-devel
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
Daniel Vetter June 27, 2019, 6:15 a.m. UTC | #3
On Wed, Jun 26, 2019 at 06:52:50PM -0400, Kenny Ho wrote:
> Ok.  I am not too familiar with shrinker but I will dig into it.  Just
> so that I am looking into the right things, you are referring to
> things like struct shrinker and struct shrink_control?

Yeah. Reason I'm asking for this is this is how system memory is shrunk
right now, so at least having some conceptual similarities might be useful
here. And a lot of people have thought quite hard about system memory
shrinking and all that, so hopefully that gives us good design
inspiration.
-Daniel

> 
> Regards,
> Kenny
> 
> On Wed, Jun 26, 2019 at 12:44 PM Daniel Vetter <daniel@ffwll.ch> wrote:
> >
> > On Wed, Jun 26, 2019 at 11:05:22AM -0400, Kenny Ho wrote:
> > > Allow DRM TTM memory manager to register a work_struct, such that, when
> > > a drmcgrp is under memory pressure, memory reclaiming can be triggered
> > > immediately.
> > >
> > > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8
> > > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com>
> > > ---
> > >  drivers/gpu/drm/ttm/ttm_bo.c    | 47 +++++++++++++++++++++++++++++++++
> > >  include/drm/drm_cgroup.h        | 14 ++++++++++
> > >  include/drm/ttm/ttm_bo_driver.h |  2 ++
> > >  kernel/cgroup/drm.c             | 33 +++++++++++++++++++++++
> > >  4 files changed, 96 insertions(+)
> > >
> > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
> > > index 79c530f4a198..5fc3bc5bd4c5 100644
> > > --- a/drivers/gpu/drm/ttm/ttm_bo.c
> > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> > > @@ -1509,6 +1509,44 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
> > >  }
> > >  EXPORT_SYMBOL(ttm_bo_evict_mm);
> > >
> > > +static void ttm_bo_reclaim_wq(struct work_struct *work)
> > > +{
> >
> > I think a design a bit more inspired by memcg aware core shrinkers would
> > be nice, i.e. explicitly passing:
> > - which drm_cgroup needs to be shrunk
> > - which ttm_mem_reg (well the fancy new abstracted out stuff for tracking
> >   special gpu memory resources like tt or vram or whatever)
> > - how much it needs to be shrunk
> >
> > I think with that a lot more the book-keeping could be pushed into the
> > drm_cgroup code, and the callback just needs to actually shrink enough as
> > requested.
> > -Daniel
> >
> > > +     struct ttm_operation_ctx ctx = {
> > > +             .interruptible = false,
> > > +             .no_wait_gpu = false,
> > > +             .flags = TTM_OPT_FLAG_FORCE_ALLOC
> > > +     };
> > > +     struct ttm_mem_type_manager *man =
> > > +         container_of(work, struct ttm_mem_type_manager, reclaim_wq);
> > > +     struct ttm_bo_device *bdev = man->bdev;
> > > +     struct dma_fence *fence;
> > > +     int mem_type;
> > > +     int ret;
> > > +
> > > +     for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
> > > +             if (&bdev->man[mem_type] == man)
> > > +                     break;
> > > +
> > > +     BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
> > > +
> > > +     if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
> > > +             return;
> > > +
> > > +     ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
> > > +     if (ret)
> > > +             return;
> > > +
> > > +     spin_lock(&man->move_lock);
> > > +     fence = dma_fence_get(man->move);
> > > +     spin_unlock(&man->move_lock);
> > > +
> > > +     if (fence) {
> > > +             ret = dma_fence_wait(fence, false);
> > > +             dma_fence_put(fence);
> > > +     }
> > > +
> > > +}
> > > +
> > >  int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> > >                       unsigned long p_size)
> > >  {
> > > @@ -1543,6 +1581,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
> > >               INIT_LIST_HEAD(&man->lru[i]);
> > >       man->move = NULL;
> > >
> > > +     pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
> > > +
> > > +     if (type <= TTM_PL_VRAM) {
> > > +             INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
> > > +             drmcgrp_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
> > > +     }
> > > +
> > >       return 0;
> > >  }
> > >  EXPORT_SYMBOL(ttm_bo_init_mm);
> > > @@ -1620,6 +1665,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
> > >               man = &bdev->man[i];
> > >               if (man->has_type) {
> > >                       man->use_type = false;
> > > +                     drmcgrp_unregister_device_mm(bdev->ddev, i);
> > > +                     cancel_work_sync(&man->reclaim_wq);
> > >                       if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
> > >                               ret = -EBUSY;
> > >                               pr_err("DRM memory manager type %d is not clean\n",
> > > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
> > > index 360c1e6c809f..134d6e5475f3 100644
> > > --- a/include/drm/drm_cgroup.h
> > > +++ b/include/drm/drm_cgroup.h
> > > @@ -5,6 +5,7 @@
> > >  #define __DRM_CGROUP_H__
> > >
> > >  #include <linux/cgroup_drm.h>
> > > +#include <linux/workqueue.h>
> > >  #include <drm/ttm/ttm_bo_api.h>
> > >  #include <drm/ttm/ttm_bo_driver.h>
> > >
> > > @@ -12,6 +13,9 @@
> > >
> > >  int drmcgrp_register_device(struct drm_device *device);
> > >  int drmcgrp_unregister_device(struct drm_device *device);
> > > +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> > > +             struct work_struct *wq);
> > > +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type);
> > >  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > >               struct drmcgrp *relative);
> > >  void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
> > > @@ -40,6 +44,16 @@ static inline int drmcgrp_unregister_device(struct drm_device *device)
> > >       return 0;
> > >  }
> > >
> > > +static inline void drmcgrp_register_device_mm(struct drm_device *dev,
> > > +             unsigned type, struct work_struct *wq)
> > > +{
> > > +}
> > > +
> > > +static inline void drmcgrp_unregister_device_mm(struct drm_device *dev,
> > > +             unsigned type)
> > > +{
> > > +}
> > > +
> > >  static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
> > >               struct drmcgrp *relative)
> > >  {
> > > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
> > > index 4cbcb41e5aa9..0956ca7888fc 100644
> > > --- a/include/drm/ttm/ttm_bo_driver.h
> > > +++ b/include/drm/ttm/ttm_bo_driver.h
> > > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager {
> > >        * Protected by @move_lock.
> > >        */
> > >       struct dma_fence *move;
> > > +
> > > +     struct work_struct reclaim_wq;
> > >  };
> > >
> > >  /**
> > > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
> > > index 1ce13db36ce9..985a89e849d3 100644
> > > --- a/kernel/cgroup/drm.c
> > > +++ b/kernel/cgroup/drm.c
> > > @@ -31,6 +31,8 @@ struct drmcgrp_device {
> > >       s64                     mem_bw_avg_bytes_per_us_default;
> > >
> > >       s64                     mem_highs_default[TTM_PL_PRIV+1];
> > > +
> > > +     struct work_struct      *mem_reclaim_wq[TTM_PL_PRIV];
> > >  };
> > >
> > >  #define DRMCG_CTF_PRIV_SIZE 3
> > > @@ -793,6 +795,31 @@ int drmcgrp_unregister_device(struct drm_device *dev)
> > >  }
> > >  EXPORT_SYMBOL(drmcgrp_unregister_device);
> > >
> > > +void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
> > > +             struct work_struct *wq)
> > > +{
> > > +     if (dev == NULL || dev->primary->index > max_minor
> > > +                     || type >= TTM_PL_PRIV)
> > > +             return;
> > > +
> > > +     mutex_lock(&drmcgrp_mutex);
> > > +     known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = wq;
> > > +     mutex_unlock(&drmcgrp_mutex);
> > > +}
> > > +EXPORT_SYMBOL(drmcgrp_register_device_mm);
> > > +
> > > +void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type)
> > > +{
> > > +     if (dev == NULL || dev->primary->index > max_minor
> > > +                     || type >= TTM_PL_PRIV)
> > > +             return;
> > > +
> > > +     mutex_lock(&drmcgrp_mutex);
> > > +     known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = NULL;
> > > +     mutex_unlock(&drmcgrp_mutex);
> > > +}
> > > +EXPORT_SYMBOL(drmcgrp_unregister_device_mm);
> > > +
> > >  bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self, struct drmcgrp *relative)
> > >  {
> > >       for (; self != NULL; self = parent_drmcgrp(self))
> > > @@ -1004,6 +1031,12 @@ void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
> > >
> > >               ddr->mem_bw_stats[DRMCGRP_MEM_BW_ATTR_BYTE_CREDIT]
> > >                       -= move_in_bytes;
> > > +
> > > +             if (known_dev->mem_reclaim_wq[new_mem_type] != NULL &&
> > > +                        ddr->mem_stats[new_mem_type] >
> > > +                             ddr->mem_highs[new_mem_type])
> > > +                     schedule_work(
> > > +                             known_dev->mem_reclaim_wq[new_mem_type]);
> > >       }
> > >       mutex_unlock(&known_dev->mutex);
> > >  }
> > > --
> > > 2.21.0
> > >
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> >
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
diff mbox series

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 79c530f4a198..5fc3bc5bd4c5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1509,6 +1509,44 @@  int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 }
 EXPORT_SYMBOL(ttm_bo_evict_mm);
 
+static void ttm_bo_reclaim_wq(struct work_struct *work)
+{
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.flags = TTM_OPT_FLAG_FORCE_ALLOC
+	};
+	struct ttm_mem_type_manager *man =
+	    container_of(work, struct ttm_mem_type_manager, reclaim_wq);
+	struct ttm_bo_device *bdev = man->bdev;
+	struct dma_fence *fence;
+	int mem_type;
+	int ret;
+
+	for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++)
+		if (&bdev->man[mem_type] == man)
+			break;
+
+	BUG_ON(mem_type >= TTM_NUM_MEM_TYPES);
+
+	if (!drmcgrp_mem_pressure_scan(bdev, mem_type))
+		return;
+
+	ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx);
+	if (ret)
+		return;
+
+	spin_lock(&man->move_lock);
+	fence = dma_fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		ret = dma_fence_wait(fence, false);
+		dma_fence_put(fence);
+	}
+
+}
+
 int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 			unsigned long p_size)
 {
@@ -1543,6 +1581,13 @@  int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 		INIT_LIST_HEAD(&man->lru[i]);
 	man->move = NULL;
 
+	pr_err("drmcgrp %p type %d\n", bdev->ddev, type);
+
+	if (type <= TTM_PL_VRAM) {
+		INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq);
+		drmcgrp_register_device_mm(bdev->ddev, type, &man->reclaim_wq);
+	}
+
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
@@ -1620,6 +1665,8 @@  int ttm_bo_device_release(struct ttm_bo_device *bdev)
 		man = &bdev->man[i];
 		if (man->has_type) {
 			man->use_type = false;
+			drmcgrp_unregister_device_mm(bdev->ddev, i);
+			cancel_work_sync(&man->reclaim_wq);
 			if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) {
 				ret = -EBUSY;
 				pr_err("DRM memory manager type %d is not clean\n",
diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h
index 360c1e6c809f..134d6e5475f3 100644
--- a/include/drm/drm_cgroup.h
+++ b/include/drm/drm_cgroup.h
@@ -5,6 +5,7 @@ 
 #define __DRM_CGROUP_H__
 
 #include <linux/cgroup_drm.h>
+#include <linux/workqueue.h>
 #include <drm/ttm/ttm_bo_api.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
@@ -12,6 +13,9 @@ 
 
 int drmcgrp_register_device(struct drm_device *device);
 int drmcgrp_unregister_device(struct drm_device *device);
+void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
+		struct work_struct *wq);
+void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type);
 bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
 		struct drmcgrp *relative);
 void drmcgrp_chg_bo_alloc(struct drmcgrp *drmcgrp, struct drm_device *dev,
@@ -40,6 +44,16 @@  static inline int drmcgrp_unregister_device(struct drm_device *device)
 	return 0;
 }
 
+static inline void drmcgrp_register_device_mm(struct drm_device *dev,
+		unsigned type, struct work_struct *wq)
+{
+}
+
+static inline void drmcgrp_unregister_device_mm(struct drm_device *dev,
+		unsigned type)
+{
+}
+
 static inline bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self,
 		struct drmcgrp *relative)
 {
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 4cbcb41e5aa9..0956ca7888fc 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -205,6 +205,8 @@  struct ttm_mem_type_manager {
 	 * Protected by @move_lock.
 	 */
 	struct dma_fence *move;
+
+	struct work_struct reclaim_wq;
 };
 
 /**
diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c
index 1ce13db36ce9..985a89e849d3 100644
--- a/kernel/cgroup/drm.c
+++ b/kernel/cgroup/drm.c
@@ -31,6 +31,8 @@  struct drmcgrp_device {
 	s64			mem_bw_avg_bytes_per_us_default;
 
 	s64			mem_highs_default[TTM_PL_PRIV+1];
+
+	struct work_struct	*mem_reclaim_wq[TTM_PL_PRIV];
 };
 
 #define DRMCG_CTF_PRIV_SIZE 3
@@ -793,6 +795,31 @@  int drmcgrp_unregister_device(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drmcgrp_unregister_device);
 
+void drmcgrp_register_device_mm(struct drm_device *dev, unsigned type,
+		struct work_struct *wq)
+{
+	if (dev == NULL || dev->primary->index > max_minor
+			|| type >= TTM_PL_PRIV)
+		return;
+
+	mutex_lock(&drmcgrp_mutex);
+	known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = wq;
+	mutex_unlock(&drmcgrp_mutex);
+}
+EXPORT_SYMBOL(drmcgrp_register_device_mm);
+
+void drmcgrp_unregister_device_mm(struct drm_device *dev, unsigned type)
+{
+	if (dev == NULL || dev->primary->index > max_minor
+			|| type >= TTM_PL_PRIV)
+		return;
+
+	mutex_lock(&drmcgrp_mutex);
+	known_drmcgrp_devs[dev->primary->index]->mem_reclaim_wq[type] = NULL;
+	mutex_unlock(&drmcgrp_mutex);
+}
+EXPORT_SYMBOL(drmcgrp_unregister_device_mm);
+
 bool drmcgrp_is_self_or_ancestor(struct drmcgrp *self, struct drmcgrp *relative)
 {
 	for (; self != NULL; self = parent_drmcgrp(self))
@@ -1004,6 +1031,12 @@  void drmcgrp_mem_track_move(struct ttm_buffer_object *old_bo, bool evict,
 
 		ddr->mem_bw_stats[DRMCGRP_MEM_BW_ATTR_BYTE_CREDIT]
 			-= move_in_bytes;
+
+		if (known_dev->mem_reclaim_wq[new_mem_type] != NULL &&
+                        ddr->mem_stats[new_mem_type] >
+				ddr->mem_highs[new_mem_type])
+			schedule_work(
+				known_dev->mem_reclaim_wq[new_mem_type]);
 	}
 	mutex_unlock(&known_dev->mutex);
 }