Message ID | 20190829060533.32315-14-Kenny.Ho@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | new cgroup controller for gpu/drm subsystem | expand |
Am 29.08.19 um 08:05 schrieb Kenny Ho: > Allow DRM TTM memory manager to register a work_struct, such that, when > a drmcgrp is under memory pressure, memory reclaiming can be triggered > immediately. > > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8 > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 49 +++++++++++++++++++++++++++++++++ > include/drm/drm_cgroup.h | 16 +++++++++++ > include/drm/ttm/ttm_bo_driver.h | 2 ++ > kernel/cgroup/drm.c | 30 ++++++++++++++++++++ > 4 files changed, 97 insertions(+) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index d7e3d3128ebb..72efae694b7e 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type) > } > EXPORT_SYMBOL(ttm_bo_evict_mm); > > +static void ttm_bo_reclaim_wq(struct work_struct *work) > +{ > + struct ttm_operation_ctx ctx = { > + .interruptible = false, > + .no_wait_gpu = false, > + .flags = TTM_OPT_FLAG_FORCE_ALLOC > + }; > + struct ttm_mem_type_manager *man = > + container_of(work, struct ttm_mem_type_manager, reclaim_wq); > + struct ttm_bo_device *bdev = man->bdev; > + struct dma_fence *fence; > + int mem_type; > + int ret; > + > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++) > + if (&bdev->man[mem_type] == man) > + break; > + > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES); > + if (mem_type >= TTM_NUM_MEM_TYPES) > + return; > + > + if (!drmcg_mem_pressure_scan(bdev, mem_type)) > + return; > + > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL); > + if (ret) > + return; > + > + spin_lock(&man->move_lock); > + fence = dma_fence_get(man->move); > + spin_unlock(&man->move_lock); > + > + if (fence) { > + ret = dma_fence_wait(fence, false); > + dma_fence_put(fence); > + } Why do you want to block for the fence here? That is a rather bad idea and would break pipe-lining. Apart from that I don't think we should put that into TTM. Instead drmcg_register_device_mm() should get a function pointer which is called from a work item when the group is under pressure. TTM can then provides the function which can be called, but the actually registration is job of the device and not TTM. Regards, Christian. > + > +} > + > int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > unsigned long p_size) > { > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > INIT_LIST_HEAD(&man->lru[i]); > man->move = NULL; > > + pr_err("drmcg %p type %d\n", bdev->ddev, type); > + > + if (type <= TTM_PL_VRAM) { > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq); > + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq); > + } > + > return 0; > } > EXPORT_SYMBOL(ttm_bo_init_mm); > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) > man = &bdev->man[i]; > if (man->has_type) { > man->use_type = false; > + drmcg_unregister_device_mm(bdev->ddev, i); > + cancel_work_sync(&man->reclaim_wq); > if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) { > ret = -EBUSY; > pr_err("DRM memory manager type %d is not clean\n", > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h > index c11df388fdf2..6d9707e1eb72 100644 > --- a/include/drm/drm_cgroup.h > +++ b/include/drm/drm_cgroup.h > @@ -5,6 +5,7 @@ > #define __DRM_CGROUP_H__ > > #include <linux/cgroup_drm.h> > +#include <linux/workqueue.h> > #include <drm/ttm/ttm_bo_api.h> > #include <drm/ttm/ttm_bo_driver.h> > > @@ -25,12 +26,17 @@ struct drmcg_props { > s64 mem_bw_avg_bytes_per_us_default; > > s64 mem_highs_default[TTM_PL_PRIV+1]; > + > + struct work_struct *mem_reclaim_wq[TTM_PL_PRIV]; > }; > > #ifdef CONFIG_CGROUP_DRM > > void drmcg_device_update(struct drm_device *device); > void drmcg_device_early_init(struct drm_device *device); > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > + struct work_struct *wq); > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type); > bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, > size_t size); > void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, > @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device *device) > { > } > > +static inline void drmcg_register_device_mm(struct drm_device *dev, > + unsigned int type, struct work_struct *wq) > +{ > +} > + > +static inline void drmcg_unregister_device_mm(struct drm_device *dev, > + unsigned int type) > +{ > +} > + > static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg, > struct drm_device *dev, size_t size) > { > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index e1a805d65b83..529cef92bcf6 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager { > * Protected by @move_lock. > */ > struct dma_fence *move; > + > + struct work_struct reclaim_wq; > }; > > /** > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c > index 04fb9a398740..0ea7f0619e25 100644 > --- a/kernel/cgroup/drm.c > +++ b/kernel/cgroup/drm.c > @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev) > } > EXPORT_SYMBOL(drmcg_device_early_init); > > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > + struct work_struct *wq) > +{ > + if (dev == NULL || type >= TTM_PL_PRIV) > + return; > + > + mutex_lock(&drmcg_mutex); > + dev->drmcg_props.mem_reclaim_wq[type] = wq; > + mutex_unlock(&drmcg_mutex); > +} > +EXPORT_SYMBOL(drmcg_register_device_mm); > + > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type) > +{ > + if (dev == NULL || type >= TTM_PL_PRIV) > + return; > + > + mutex_lock(&drmcg_mutex); > + dev->drmcg_props.mem_reclaim_wq[type] = NULL; > + mutex_unlock(&drmcg_mutex); > +} > +EXPORT_SYMBOL(drmcg_unregister_device_mm); > + > /** > * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and cgroup > * @drmcg: the DRM cgroup to be charged to > @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, bool evict, > > ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT] > -= move_in_bytes; > + > + if (dev->drmcg_props.mem_reclaim_wq[new_mem_type] > + != NULL && > + ddr->mem_stats[new_mem_type] > > + ddr->mem_highs[new_mem_type]) > + schedule_work(dev-> > + drmcg_props.mem_reclaim_wq[new_mem_type]); > } > mutex_unlock(&dev->drmcg_mutex); > }
Thanks for the feedback Christian. I am still digging into this one. Daniel suggested leveraging the Shrinker API for the functionality of this commit in RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker (though the idea behind the shrinker API seems fairly straightforward as far as I understand it currently.) Regards, Kenny On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian <Christian.Koenig@amd.com> wrote: > Am 29.08.19 um 08:05 schrieb Kenny Ho: > > Allow DRM TTM memory manager to register a work_struct, such that, when > > a drmcgrp is under memory pressure, memory reclaiming can be triggered > > immediately. > > > > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8 > > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> > > --- > > drivers/gpu/drm/ttm/ttm_bo.c | 49 +++++++++++++++++++++++++++++++++ > > include/drm/drm_cgroup.h | 16 +++++++++++ > > include/drm/ttm/ttm_bo_driver.h | 2 ++ > > kernel/cgroup/drm.c | 30 ++++++++++++++++++++ > > 4 files changed, 97 insertions(+) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > > index d7e3d3128ebb..72efae694b7e 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, > unsigned mem_type) > > } > > EXPORT_SYMBOL(ttm_bo_evict_mm); > > > > +static void ttm_bo_reclaim_wq(struct work_struct *work) > > +{ > > + struct ttm_operation_ctx ctx = { > > + .interruptible = false, > > + .no_wait_gpu = false, > > + .flags = TTM_OPT_FLAG_FORCE_ALLOC > > + }; > > + struct ttm_mem_type_manager *man = > > + container_of(work, struct ttm_mem_type_manager, reclaim_wq); > > + struct ttm_bo_device *bdev = man->bdev; > > + struct dma_fence *fence; > > + int mem_type; > > + int ret; > > + > > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++) > > + if (&bdev->man[mem_type] == man) > > + break; > > + > > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES); > > + if (mem_type >= TTM_NUM_MEM_TYPES) > > + return; > > + > > + if (!drmcg_mem_pressure_scan(bdev, mem_type)) > > + return; > > + > > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL); > > + if (ret) > > + return; > > + > > + spin_lock(&man->move_lock); > > + fence = dma_fence_get(man->move); > > + spin_unlock(&man->move_lock); > > + > > + if (fence) { > > + ret = dma_fence_wait(fence, false); > > + dma_fence_put(fence); > > + } > > Why do you want to block for the fence here? That is a rather bad idea > and would break pipe-lining. > > Apart from that I don't think we should put that into TTM. > > Instead drmcg_register_device_mm() should get a function pointer which > is called from a work item when the group is under pressure. > > TTM can then provides the function which can be called, but the actually > registration is job of the device and not TTM. > > Regards, > Christian. > > > + > > +} > > + > > int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > > unsigned long p_size) > > { > > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, > unsigned type, > > INIT_LIST_HEAD(&man->lru[i]); > > man->move = NULL; > > > > + pr_err("drmcg %p type %d\n", bdev->ddev, type); > > + > > + if (type <= TTM_PL_VRAM) { > > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq); > > + drmcg_register_device_mm(bdev->ddev, type, > &man->reclaim_wq); > > + } > > + > > return 0; > > } > > EXPORT_SYMBOL(ttm_bo_init_mm); > > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device > *bdev) > > man = &bdev->man[i]; > > if (man->has_type) { > > man->use_type = false; > > + drmcg_unregister_device_mm(bdev->ddev, i); > > + cancel_work_sync(&man->reclaim_wq); > > if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, > i)) { > > ret = -EBUSY; > > pr_err("DRM memory manager type %d is not > clean\n", > > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h > > index c11df388fdf2..6d9707e1eb72 100644 > > --- a/include/drm/drm_cgroup.h > > +++ b/include/drm/drm_cgroup.h > > @@ -5,6 +5,7 @@ > > #define __DRM_CGROUP_H__ > > > > #include <linux/cgroup_drm.h> > > +#include <linux/workqueue.h> > > #include <drm/ttm/ttm_bo_api.h> > > #include <drm/ttm/ttm_bo_driver.h> > > > > @@ -25,12 +26,17 @@ struct drmcg_props { > > s64 mem_bw_avg_bytes_per_us_default; > > > > s64 mem_highs_default[TTM_PL_PRIV+1]; > > + > > + struct work_struct *mem_reclaim_wq[TTM_PL_PRIV]; > > }; > > > > #ifdef CONFIG_CGROUP_DRM > > > > void drmcg_device_update(struct drm_device *device); > > void drmcg_device_early_init(struct drm_device *device); > > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > > + struct work_struct *wq); > > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int > type); > > bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device > *dev, > > size_t size); > > void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, > > @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct > drm_device *device) > > { > > } > > > > +static inline void drmcg_register_device_mm(struct drm_device *dev, > > + unsigned int type, struct work_struct *wq) > > +{ > > +} > > + > > +static inline void drmcg_unregister_device_mm(struct drm_device *dev, > > + unsigned int type) > > +{ > > +} > > + > > static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg, > > struct drm_device *dev, size_t size) > > { > > diff --git a/include/drm/ttm/ttm_bo_driver.h > b/include/drm/ttm/ttm_bo_driver.h > > index e1a805d65b83..529cef92bcf6 100644 > > --- a/include/drm/ttm/ttm_bo_driver.h > > +++ b/include/drm/ttm/ttm_bo_driver.h > > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager { > > * Protected by @move_lock. > > */ > > struct dma_fence *move; > > + > > + struct work_struct reclaim_wq; > > }; > > > > /** > > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c > > index 04fb9a398740..0ea7f0619e25 100644 > > --- a/kernel/cgroup/drm.c > > +++ b/kernel/cgroup/drm.c > > @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev) > > } > > EXPORT_SYMBOL(drmcg_device_early_init); > > > > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > > + struct work_struct *wq) > > +{ > > + if (dev == NULL || type >= TTM_PL_PRIV) > > + return; > > + > > + mutex_lock(&drmcg_mutex); > > + dev->drmcg_props.mem_reclaim_wq[type] = wq; > > + mutex_unlock(&drmcg_mutex); > > +} > > +EXPORT_SYMBOL(drmcg_register_device_mm); > > + > > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int > type) > > +{ > > + if (dev == NULL || type >= TTM_PL_PRIV) > > + return; > > + > > + mutex_lock(&drmcg_mutex); > > + dev->drmcg_props.mem_reclaim_wq[type] = NULL; > > + mutex_unlock(&drmcg_mutex); > > +} > > +EXPORT_SYMBOL(drmcg_unregister_device_mm); > > + > > /** > > * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and > cgroup > > * @drmcg: the DRM cgroup to be charged to > > @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct > ttm_buffer_object *old_bo, bool evict, > > > > ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT] > > -= move_in_bytes; > > + > > + if (dev->drmcg_props.mem_reclaim_wq[new_mem_type] > > + != NULL && > > + ddr->mem_stats[new_mem_type] > > > + ddr->mem_highs[new_mem_type]) > > + schedule_work(dev-> > > + drmcg_props.mem_reclaim_wq[new_mem_type]); > > } > > mutex_unlock(&dev->drmcg_mutex); > > } > >
Yeah, that's also a really good idea as well. The problem with the shrinker API is that it only applies to system memory currently. So you won't have a distinction which domain you need to evict stuff from. Regards, Christian. Am 29.08.19 um 16:07 schrieb Kenny Ho: Thanks for the feedback Christian. I am still digging into this one. Daniel suggested leveraging the Shrinker API for the functionality of this commit in RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker (though the idea behind the shrinker API seems fairly straightforward as far as I understand it currently.) Regards, Kenny On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian <Christian.Koenig@amd.com<mailto:Christian.Koenig@amd.com>> wrote: Am 29.08.19 um 08:05 schrieb Kenny Ho: > Allow DRM TTM memory manager to register a work_struct, such that, when > a drmcgrp is under memory pressure, memory reclaiming can be triggered > immediately. > > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8 > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com<mailto:Kenny.Ho@amd.com>> > --- > drivers/gpu/drm/ttm/ttm_bo.c | 49 +++++++++++++++++++++++++++++++++ > include/drm/drm_cgroup.h | 16 +++++++++++ > include/drm/ttm/ttm_bo_driver.h | 2 ++ > kernel/cgroup/drm.c | 30 ++++++++++++++++++++ > 4 files changed, 97 insertions(+) > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c > index d7e3d3128ebb..72efae694b7e 100644 > --- a/drivers/gpu/drm/ttm/ttm_bo.c > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type) > } > EXPORT_SYMBOL(ttm_bo_evict_mm); > > +static void ttm_bo_reclaim_wq(struct work_struct *work) > +{ > + struct ttm_operation_ctx ctx = { > + .interruptible = false, > + .no_wait_gpu = false, > + .flags = TTM_OPT_FLAG_FORCE_ALLOC > + }; > + struct ttm_mem_type_manager *man = > + container_of(work, struct ttm_mem_type_manager, reclaim_wq); > + struct ttm_bo_device *bdev = man->bdev; > + struct dma_fence *fence; > + int mem_type; > + int ret; > + > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++) > + if (&bdev->man[mem_type] == man) > + break; > + > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES); > + if (mem_type >= TTM_NUM_MEM_TYPES) > + return; > + > + if (!drmcg_mem_pressure_scan(bdev, mem_type)) > + return; > + > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL); > + if (ret) > + return; > + > + spin_lock(&man->move_lock); > + fence = dma_fence_get(man->move); > + spin_unlock(&man->move_lock); > + > + if (fence) { > + ret = dma_fence_wait(fence, false); > + dma_fence_put(fence); > + } Why do you want to block for the fence here? That is a rather bad idea and would break pipe-lining. Apart from that I don't think we should put that into TTM. Instead drmcg_register_device_mm() should get a function pointer which is called from a work item when the group is under pressure. TTM can then provides the function which can be called, but the actually registration is job of the device and not TTM. Regards, Christian. > + > +} > + > int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > unsigned long p_size) > { > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, > INIT_LIST_HEAD(&man->lru[i]); > man->move = NULL; > > + pr_err("drmcg %p type %d\n", bdev->ddev, type); > + > + if (type <= TTM_PL_VRAM) { > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq); > + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq); > + } > + > return 0; > } > EXPORT_SYMBOL(ttm_bo_init_mm); > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) > man = &bdev->man[i]; > if (man->has_type) { > man->use_type = false; > + drmcg_unregister_device_mm(bdev->ddev, i); > + cancel_work_sync(&man->reclaim_wq); > if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) { > ret = -EBUSY; > pr_err("DRM memory manager type %d is not clean\n", > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h > index c11df388fdf2..6d9707e1eb72 100644 > --- a/include/drm/drm_cgroup.h > +++ b/include/drm/drm_cgroup.h > @@ -5,6 +5,7 @@ > #define __DRM_CGROUP_H__ > > #include <linux/cgroup_drm.h> > +#include <linux/workqueue.h> > #include <drm/ttm/ttm_bo_api.h> > #include <drm/ttm/ttm_bo_driver.h> > > @@ -25,12 +26,17 @@ struct drmcg_props { > s64 mem_bw_avg_bytes_per_us_default; > > s64 mem_highs_default[TTM_PL_PRIV+1]; > + > + struct work_struct *mem_reclaim_wq[TTM_PL_PRIV]; > }; > > #ifdef CONFIG_CGROUP_DRM > > void drmcg_device_update(struct drm_device *device); > void drmcg_device_early_init(struct drm_device *device); > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > + struct work_struct *wq); > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type); > bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, > size_t size); > void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, > @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device *device) > { > } > > +static inline void drmcg_register_device_mm(struct drm_device *dev, > + unsigned int type, struct work_struct *wq) > +{ > +} > + > +static inline void drmcg_unregister_device_mm(struct drm_device *dev, > + unsigned int type) > +{ > +} > + > static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg, > struct drm_device *dev, size_t size) > { > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h > index e1a805d65b83..529cef92bcf6 100644 > --- a/include/drm/ttm/ttm_bo_driver.h > +++ b/include/drm/ttm/ttm_bo_driver.h > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager { > * Protected by @move_lock. > */ > struct dma_fence *move; > + > + struct work_struct reclaim_wq; > }; > > /** > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c > index 04fb9a398740..0ea7f0619e25 100644 > --- a/kernel/cgroup/drm.c > +++ b/kernel/cgroup/drm.c > @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev) > } > EXPORT_SYMBOL(drmcg_device_early_init); > > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, > + struct work_struct *wq) > +{ > + if (dev == NULL || type >= TTM_PL_PRIV) > + return; > + > + mutex_lock(&drmcg_mutex); > + dev->drmcg_props.mem_reclaim_wq[type] = wq; > + mutex_unlock(&drmcg_mutex); > +} > +EXPORT_SYMBOL(drmcg_register_device_mm); > + > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type) > +{ > + if (dev == NULL || type >= TTM_PL_PRIV) > + return; > + > + mutex_lock(&drmcg_mutex); > + dev->drmcg_props.mem_reclaim_wq[type] = NULL; > + mutex_unlock(&drmcg_mutex); > +} > +EXPORT_SYMBOL(drmcg_unregister_device_mm); > + > /** > * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and cgroup > * @drmcg: the DRM cgroup to be charged to > @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, bool evict, > > ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT] > -= move_in_bytes; > + > + if (dev->drmcg_props.mem_reclaim_wq[new_mem_type] > + != NULL && > + ddr->mem_stats[new_mem_type] > > + ddr->mem_highs[new_mem_type]) > + schedule_work(dev-> > + drmcg_props.mem_reclaim_wq[new_mem_type]); > } > mutex_unlock(&dev->drmcg_mutex); > }
Yes, and I think it has quite a lot of coupling with mm's page and pressure mechanisms. My current thought is to just copy the API but have a separate implementation of "ttm_shrinker" and "ttm_shrinker_control" or something like that. I am certainly happy to listen to additional feedbacks and suggestions. Regards, Kenny On Thu, Aug 29, 2019 at 10:12 AM Koenig, Christian <Christian.Koenig@amd.com> wrote: > > Yeah, that's also a really good idea as well. > > The problem with the shrinker API is that it only applies to system memory currently. > > So you won't have a distinction which domain you need to evict stuff from. > > Regards, > Christian. > > Am 29.08.19 um 16:07 schrieb Kenny Ho: > > Thanks for the feedback Christian. I am still digging into this one. Daniel suggested leveraging the Shrinker API for the functionality of this commit in RFC v3 but I am still trying to figure it out how/if ttm fit with shrinker (though the idea behind the shrinker API seems fairly straightforward as far as I understand it currently.) > > Regards, > Kenny > > On Thu, Aug 29, 2019 at 3:08 AM Koenig, Christian <Christian.Koenig@amd.com> wrote: >> >> Am 29.08.19 um 08:05 schrieb Kenny Ho: >> > Allow DRM TTM memory manager to register a work_struct, such that, when >> > a drmcgrp is under memory pressure, memory reclaiming can be triggered >> > immediately. >> > >> > Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8 >> > Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> >> > --- >> > drivers/gpu/drm/ttm/ttm_bo.c | 49 +++++++++++++++++++++++++++++++++ >> > include/drm/drm_cgroup.h | 16 +++++++++++ >> > include/drm/ttm/ttm_bo_driver.h | 2 ++ >> > kernel/cgroup/drm.c | 30 ++++++++++++++++++++ >> > 4 files changed, 97 insertions(+) >> > >> > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c >> > index d7e3d3128ebb..72efae694b7e 100644 >> > --- a/drivers/gpu/drm/ttm/ttm_bo.c >> > +++ b/drivers/gpu/drm/ttm/ttm_bo.c >> > @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type) >> > } >> > EXPORT_SYMBOL(ttm_bo_evict_mm); >> > >> > +static void ttm_bo_reclaim_wq(struct work_struct *work) >> > +{ >> > + struct ttm_operation_ctx ctx = { >> > + .interruptible = false, >> > + .no_wait_gpu = false, >> > + .flags = TTM_OPT_FLAG_FORCE_ALLOC >> > + }; >> > + struct ttm_mem_type_manager *man = >> > + container_of(work, struct ttm_mem_type_manager, reclaim_wq); >> > + struct ttm_bo_device *bdev = man->bdev; >> > + struct dma_fence *fence; >> > + int mem_type; >> > + int ret; >> > + >> > + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++) >> > + if (&bdev->man[mem_type] == man) >> > + break; >> > + >> > + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES); >> > + if (mem_type >= TTM_NUM_MEM_TYPES) >> > + return; >> > + >> > + if (!drmcg_mem_pressure_scan(bdev, mem_type)) >> > + return; >> > + >> > + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL); >> > + if (ret) >> > + return; >> > + >> > + spin_lock(&man->move_lock); >> > + fence = dma_fence_get(man->move); >> > + spin_unlock(&man->move_lock); >> > + >> > + if (fence) { >> > + ret = dma_fence_wait(fence, false); >> > + dma_fence_put(fence); >> > + } >> >> Why do you want to block for the fence here? That is a rather bad idea >> and would break pipe-lining. >> >> Apart from that I don't think we should put that into TTM. >> >> Instead drmcg_register_device_mm() should get a function pointer which >> is called from a work item when the group is under pressure. >> >> TTM can then provides the function which can be called, but the actually >> registration is job of the device and not TTM. >> >> Regards, >> Christian. >> >> > + >> > +} >> > + >> > int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, >> > unsigned long p_size) >> > { >> > @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, >> > INIT_LIST_HEAD(&man->lru[i]); >> > man->move = NULL; >> > >> > + pr_err("drmcg %p type %d\n", bdev->ddev, type); >> > + >> > + if (type <= TTM_PL_VRAM) { >> > + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq); >> > + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq); >> > + } >> > + >> > return 0; >> > } >> > EXPORT_SYMBOL(ttm_bo_init_mm); >> > @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) >> > man = &bdev->man[i]; >> > if (man->has_type) { >> > man->use_type = false; >> > + drmcg_unregister_device_mm(bdev->ddev, i); >> > + cancel_work_sync(&man->reclaim_wq); >> > if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) { >> > ret = -EBUSY; >> > pr_err("DRM memory manager type %d is not clean\n", >> > diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h >> > index c11df388fdf2..6d9707e1eb72 100644 >> > --- a/include/drm/drm_cgroup.h >> > +++ b/include/drm/drm_cgroup.h >> > @@ -5,6 +5,7 @@ >> > #define __DRM_CGROUP_H__ >> > >> > #include <linux/cgroup_drm.h> >> > +#include <linux/workqueue.h> >> > #include <drm/ttm/ttm_bo_api.h> >> > #include <drm/ttm/ttm_bo_driver.h> >> > >> > @@ -25,12 +26,17 @@ struct drmcg_props { >> > s64 mem_bw_avg_bytes_per_us_default; >> > >> > s64 mem_highs_default[TTM_PL_PRIV+1]; >> > + >> > + struct work_struct *mem_reclaim_wq[TTM_PL_PRIV]; >> > }; >> > >> > #ifdef CONFIG_CGROUP_DRM >> > >> > void drmcg_device_update(struct drm_device *device); >> > void drmcg_device_early_init(struct drm_device *device); >> > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, >> > + struct work_struct *wq); >> > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type); >> > bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, >> > size_t size); >> > void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, >> > @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device *device) >> > { >> > } >> > >> > +static inline void drmcg_register_device_mm(struct drm_device *dev, >> > + unsigned int type, struct work_struct *wq) >> > +{ >> > +} >> > + >> > +static inline void drmcg_unregister_device_mm(struct drm_device *dev, >> > + unsigned int type) >> > +{ >> > +} >> > + >> > static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg, >> > struct drm_device *dev, size_t size) >> > { >> > diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h >> > index e1a805d65b83..529cef92bcf6 100644 >> > --- a/include/drm/ttm/ttm_bo_driver.h >> > +++ b/include/drm/ttm/ttm_bo_driver.h >> > @@ -205,6 +205,8 @@ struct ttm_mem_type_manager { >> > * Protected by @move_lock. >> > */ >> > struct dma_fence *move; >> > + >> > + struct work_struct reclaim_wq; >> > }; >> > >> > /** >> > diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c >> > index 04fb9a398740..0ea7f0619e25 100644 >> > --- a/kernel/cgroup/drm.c >> > +++ b/kernel/cgroup/drm.c >> > @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev) >> > } >> > EXPORT_SYMBOL(drmcg_device_early_init); >> > >> > +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, >> > + struct work_struct *wq) >> > +{ >> > + if (dev == NULL || type >= TTM_PL_PRIV) >> > + return; >> > + >> > + mutex_lock(&drmcg_mutex); >> > + dev->drmcg_props.mem_reclaim_wq[type] = wq; >> > + mutex_unlock(&drmcg_mutex); >> > +} >> > +EXPORT_SYMBOL(drmcg_register_device_mm); >> > + >> > +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type) >> > +{ >> > + if (dev == NULL || type >= TTM_PL_PRIV) >> > + return; >> > + >> > + mutex_lock(&drmcg_mutex); >> > + dev->drmcg_props.mem_reclaim_wq[type] = NULL; >> > + mutex_unlock(&drmcg_mutex); >> > +} >> > +EXPORT_SYMBOL(drmcg_unregister_device_mm); >> > + >> > /** >> > * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and cgroup >> > * @drmcg: the DRM cgroup to be charged to >> > @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, bool evict, >> > >> > ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT] >> > -= move_in_bytes; >> > + >> > + if (dev->drmcg_props.mem_reclaim_wq[new_mem_type] >> > + != NULL && >> > + ddr->mem_stats[new_mem_type] > >> > + ddr->mem_highs[new_mem_type]) >> > + schedule_work(dev-> >> > + drmcg_props.mem_reclaim_wq[new_mem_type]); >> > } >> > mutex_unlock(&dev->drmcg_mutex); >> > } >> >
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index d7e3d3128ebb..72efae694b7e 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1590,6 +1590,46 @@ int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type) } EXPORT_SYMBOL(ttm_bo_evict_mm); +static void ttm_bo_reclaim_wq(struct work_struct *work) +{ + struct ttm_operation_ctx ctx = { + .interruptible = false, + .no_wait_gpu = false, + .flags = TTM_OPT_FLAG_FORCE_ALLOC + }; + struct ttm_mem_type_manager *man = + container_of(work, struct ttm_mem_type_manager, reclaim_wq); + struct ttm_bo_device *bdev = man->bdev; + struct dma_fence *fence; + int mem_type; + int ret; + + for (mem_type = 0; mem_type < TTM_NUM_MEM_TYPES; mem_type++) + if (&bdev->man[mem_type] == man) + break; + + WARN_ON(mem_type >= TTM_NUM_MEM_TYPES); + if (mem_type >= TTM_NUM_MEM_TYPES) + return; + + if (!drmcg_mem_pressure_scan(bdev, mem_type)) + return; + + ret = ttm_mem_evict_first(bdev, mem_type, NULL, &ctx, NULL); + if (ret) + return; + + spin_lock(&man->move_lock); + fence = dma_fence_get(man->move); + spin_unlock(&man->move_lock); + + if (fence) { + ret = dma_fence_wait(fence, false); + dma_fence_put(fence); + } + +} + int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, unsigned long p_size) { @@ -1624,6 +1664,13 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type, INIT_LIST_HEAD(&man->lru[i]); man->move = NULL; + pr_err("drmcg %p type %d\n", bdev->ddev, type); + + if (type <= TTM_PL_VRAM) { + INIT_WORK(&man->reclaim_wq, ttm_bo_reclaim_wq); + drmcg_register_device_mm(bdev->ddev, type, &man->reclaim_wq); + } + return 0; } EXPORT_SYMBOL(ttm_bo_init_mm); @@ -1701,6 +1748,8 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev) man = &bdev->man[i]; if (man->has_type) { man->use_type = false; + drmcg_unregister_device_mm(bdev->ddev, i); + cancel_work_sync(&man->reclaim_wq); if ((i != TTM_PL_SYSTEM) && ttm_bo_clean_mm(bdev, i)) { ret = -EBUSY; pr_err("DRM memory manager type %d is not clean\n", diff --git a/include/drm/drm_cgroup.h b/include/drm/drm_cgroup.h index c11df388fdf2..6d9707e1eb72 100644 --- a/include/drm/drm_cgroup.h +++ b/include/drm/drm_cgroup.h @@ -5,6 +5,7 @@ #define __DRM_CGROUP_H__ #include <linux/cgroup_drm.h> +#include <linux/workqueue.h> #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> @@ -25,12 +26,17 @@ struct drmcg_props { s64 mem_bw_avg_bytes_per_us_default; s64 mem_highs_default[TTM_PL_PRIV+1]; + + struct work_struct *mem_reclaim_wq[TTM_PL_PRIV]; }; #ifdef CONFIG_CGROUP_DRM void drmcg_device_update(struct drm_device *device); void drmcg_device_early_init(struct drm_device *device); +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, + struct work_struct *wq); +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type); bool drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, size_t size); void drmcg_unchg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, @@ -53,6 +59,16 @@ static inline void drmcg_device_early_init(struct drm_device *device) { } +static inline void drmcg_register_device_mm(struct drm_device *dev, + unsigned int type, struct work_struct *wq) +{ +} + +static inline void drmcg_unregister_device_mm(struct drm_device *dev, + unsigned int type) +{ +} + static inline void drmcg_try_chg_bo_alloc(struct drmcg *drmcg, struct drm_device *dev, size_t size) { diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index e1a805d65b83..529cef92bcf6 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -205,6 +205,8 @@ struct ttm_mem_type_manager { * Protected by @move_lock. */ struct dma_fence *move; + + struct work_struct reclaim_wq; }; /** diff --git a/kernel/cgroup/drm.c b/kernel/cgroup/drm.c index 04fb9a398740..0ea7f0619e25 100644 --- a/kernel/cgroup/drm.c +++ b/kernel/cgroup/drm.c @@ -804,6 +804,29 @@ void drmcg_device_early_init(struct drm_device *dev) } EXPORT_SYMBOL(drmcg_device_early_init); +void drmcg_register_device_mm(struct drm_device *dev, unsigned int type, + struct work_struct *wq) +{ + if (dev == NULL || type >= TTM_PL_PRIV) + return; + + mutex_lock(&drmcg_mutex); + dev->drmcg_props.mem_reclaim_wq[type] = wq; + mutex_unlock(&drmcg_mutex); +} +EXPORT_SYMBOL(drmcg_register_device_mm); + +void drmcg_unregister_device_mm(struct drm_device *dev, unsigned int type) +{ + if (dev == NULL || type >= TTM_PL_PRIV) + return; + + mutex_lock(&drmcg_mutex); + dev->drmcg_props.mem_reclaim_wq[type] = NULL; + mutex_unlock(&drmcg_mutex); +} +EXPORT_SYMBOL(drmcg_unregister_device_mm); + /** * drmcg_try_chg_bo_alloc - charge GEM buffer usage for a device and cgroup * @drmcg: the DRM cgroup to be charged to @@ -1013,6 +1036,13 @@ void drmcg_mem_track_move(struct ttm_buffer_object *old_bo, bool evict, ddr->mem_bw_stats[DRMCG_MEM_BW_ATTR_BYTE_CREDIT] -= move_in_bytes; + + if (dev->drmcg_props.mem_reclaim_wq[new_mem_type] + != NULL && + ddr->mem_stats[new_mem_type] > + ddr->mem_highs[new_mem_type]) + schedule_work(dev-> + drmcg_props.mem_reclaim_wq[new_mem_type]); } mutex_unlock(&dev->drmcg_mutex); }
Allow DRM TTM memory manager to register a work_struct, such that, when a drmcgrp is under memory pressure, memory reclaiming can be triggered immediately. Change-Id: I25ac04e2db9c19ff12652b88ebff18b44b2706d8 Signed-off-by: Kenny Ho <Kenny.Ho@amd.com> --- drivers/gpu/drm/ttm/ttm_bo.c | 49 +++++++++++++++++++++++++++++++++ include/drm/drm_cgroup.h | 16 +++++++++++ include/drm/ttm/ttm_bo_driver.h | 2 ++ kernel/cgroup/drm.c | 30 ++++++++++++++++++++ 4 files changed, 97 insertions(+)