Message ID | 20230903170736.513347-16-dmitry.osipenko@collabora.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add generic memory shrinker to VirtIO-GPU and Panfrost DRM drivers | expand |
On Sun, 3 Sep 2023 20:07:31 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > Introduce common drm-shmem shrinker for DRM drivers. > > To start using drm-shmem shrinker drivers should do the following: > > 1. Implement evict() callback of GEM object where driver should check > whether object is purgeable or evictable using drm-shmem helpers and > perform the shrinking action > > 2. Initialize drm-shmem internals using drmm_gem_shmem_init(drm_device), > which will register drm-shmem shrinker > > 3. Implement madvise IOCTL that will use drm_gem_shmem_madvise() > > Signed-off-by: Daniel Almeida <daniel.almeida@collabora.com> > Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com> > --- > drivers/gpu/drm/drm_gem_shmem_helper.c | 442 ++++++++++++++++-- > .../gpu/drm/panfrost/panfrost_gem_shrinker.c | 9 +- > include/drm/drm_device.h | 10 +- > include/drm/drm_gem_shmem_helper.h | 71 ++- > 4 files changed, 494 insertions(+), 38 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c > index 4633a418faba..a0a6c7e505c8 100644 > --- a/drivers/gpu/drm/drm_gem_shmem_helper.c > +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c > @@ -20,6 +20,7 @@ > #include <drm/drm_device.h> > #include <drm/drm_drv.h> > #include <drm/drm_gem_shmem_helper.h> > +#include <drm/drm_managed.h> > #include <drm/drm_prime.h> > #include <drm/drm_print.h> > > @@ -88,8 +89,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) > if (ret) > goto err_release; > > - INIT_LIST_HEAD(&shmem->madv_list); > - > if (!private) { > /* > * Our buffers are kept pinned, so allocating them > @@ -128,6 +127,62 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_create); > > +static bool drm_gem_shmem_is_evictable(struct drm_gem_shmem_object *shmem) > +{ > + dma_resv_assert_held(shmem->base.resv); > + > + return (shmem->madv >= 0) && shmem->base.funcs->evict && > + refcount_read(&shmem->pages_use_count) && > + !refcount_read(&shmem->pages_pin_count) && > + !shmem->base.dma_buf && !shmem->base.import_attach && > + shmem->sgt && !shmem->evicted; > +} > + > +static void > +drm_gem_shmem_update_pages_state_locked(struct drm_gem_shmem_object *shmem) > +{ > + struct drm_gem_object *obj = &shmem->base; > + struct drm_gem_shmem *shmem_mm = obj->dev->shmem_mm; > + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; > + > + dma_resv_assert_held(shmem->base.resv); > + > + if (!shmem_shrinker || obj->import_attach) > + return; > + > + if (shmem->madv < 0) > + drm_gem_lru_remove(&shmem->base); > + else if (drm_gem_shmem_is_evictable(shmem) || drm_gem_shmem_is_purgeable(shmem)) > + drm_gem_lru_move_tail(&shmem_shrinker->lru_evictable, &shmem->base); > + else if (shmem->evicted) > + drm_gem_lru_move_tail(&shmem_shrinker->lru_evicted, &shmem->base); > + else if (!shmem->pages) > + drm_gem_lru_remove(&shmem->base); > + else > + drm_gem_lru_move_tail(&shmem_shrinker->lru_pinned, &shmem->base); > +} > + > +static void > +drm_gem_shmem_do_release_pages_locked(struct drm_gem_shmem_object *shmem) > +{ > + struct drm_gem_object *obj = &shmem->base; > + > + if (!shmem->pages) { > + drm_WARN_ON(obj->dev, !shmem->evicted && shmem->madv >= 0); > + return; > + } > + > +#ifdef CONFIG_X86 > + if (shmem->map_wc) > + set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); > +#endif > + > + drm_gem_put_pages(obj, shmem->pages, > + shmem->pages_mark_dirty_on_put, > + shmem->pages_mark_accessed_on_put); > + shmem->pages = NULL; > +} > + > /** > * drm_gem_shmem_free - Free resources associated with a shmem GEM object > * @shmem: shmem GEM object to free > @@ -142,8 +197,6 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) > if (obj->import_attach) { > drm_prime_gem_destroy(obj, shmem->sgt); > } else if (!shmem->imported_sgt) { > - dma_resv_lock(shmem->base.resv, NULL); > - > drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count)); > > if (shmem->sgt) { > @@ -152,14 +205,27 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) > sg_free_table(shmem->sgt); > kfree(shmem->sgt); > } > - if (refcount_read(&shmem->pages_use_count)) { > - drm_gem_shmem_put_pages_locked(shmem); It would be preferable to introduce drm_gem_shmem_do_release_pages_locked() earlier in the series (not a huge fan of the name BTW, especially since this function can be called without the lock held in the free path), so we keep things bisectable. > - drm_WARN_ON(obj->dev, !shmem->got_pages_sgt); > + > + /* > + * Destroying the object is a special case.. drm_gem_shmem_free() > + * calls many things that WARN_ON if the obj lock is not held. If that's the case, that should be fixed the same way you're fixing the put_pages_locked() situation here. > * But > + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > + * cause a locking order inversion between reservation_ww_class_mutex > + * and fs_reclaim. > + * > + * This deadlock is not actually possible, because no one should > + * be already holding the lock when drm_gem_shmem_free() is called. > + * Unfortunately lockdep is not aware of this detail. So when the > + * refcount drops to zero, don't touch the reservation lock. > + */ > + if (shmem->got_pages_sgt && > + refcount_dec_and_test(&shmem->pages_use_count)) { > + drm_gem_shmem_do_release_pages_locked(shmem); > + shmem->got_pages_sgt = false; > } Leaking memory is the right thing to do if pages_use_count > 1 (it's better to leak than having someone access memory it no longer owns), but I think it's worth mentioning in the above comment. > > drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count)); > - > - dma_resv_unlock(shmem->base.resv); > + drm_WARN_ON(obj->dev, shmem->got_pages_sgt); > } > > drm_gem_object_release(obj); > @@ -167,15 +233,26 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_free); > > -static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) > +static int > +drm_gem_shmem_acquire_pages(struct drm_gem_shmem_object *shmem, bool init) > { > struct drm_gem_object *obj = &shmem->base; > struct page **pages; > > dma_resv_assert_held(shmem->base.resv); I'd defer this check to the caller (AFAICT, all callers have this already). This way the function name is consistent with its behavior. > > - if (refcount_inc_not_zero(&shmem->pages_use_count)) > + if (shmem->madv < 0) { > + drm_WARN_ON(obj->dev, shmem->pages); > + return -ENOMEM; > + } > + > + if (shmem->pages) { > + drm_WARN_ON(obj->dev, !shmem->evicted); > return 0; > + } > + > + if (drm_WARN_ON(obj->dev, !(init ^ refcount_read(&shmem->pages_use_count)))) > + return -EINVAL; > > pages = drm_gem_get_pages(obj); > if (IS_ERR(pages)) { > @@ -196,8 +273,36 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) > > shmem->pages = pages; > > + return 0; > +} > + > +static void > +drm_gem_shmem_release_pages_locked(struct drm_gem_shmem_object *shmem) > +{ > + dma_resv_assert_held(shmem->base.resv); > + drm_gem_shmem_do_release_pages_locked(shmem); > +} Do we really need an intermediate helper here. Can't we let the callers ensure the resv is held (that's already the case BTW). This way, you simply have drm_gem_shmem_{release,acquire}_pages() helpers, and the drm_gem_shmem_release_pages() helper can be used in drm_gem_shmem_free(). > + > +static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) > +{ > + int err; > + > + dma_resv_assert_held(shmem->base.resv); > + > + if (shmem->madv < 0) > + return -ENOMEM; > + > + if (refcount_inc_not_zero(&shmem->pages_use_count)) > + return 0; > + > + err = drm_gem_shmem_acquire_pages(shmem, true); > + if (err) > + return err; > + > refcount_set(&shmem->pages_use_count, 1); > > + drm_gem_shmem_update_pages_state_locked(shmem); > + > return 0; > } > > @@ -209,20 +314,11 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) > */ > void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem) > { > - struct drm_gem_object *obj = &shmem->base; > - > dma_resv_assert_held(shmem->base.resv); > > if (refcount_dec_and_test(&shmem->pages_use_count)) { > -#ifdef CONFIG_X86 > - if (shmem->map_wc) > - set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); > -#endif > - > - drm_gem_put_pages(obj, shmem->pages, > - shmem->pages_mark_dirty_on_put, > - shmem->pages_mark_accessed_on_put); > - shmem->pages = NULL; > + drm_gem_shmem_release_pages_locked(shmem); > + drm_gem_shmem_update_pages_state_locked(shmem); > } > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_put_pages_locked); > @@ -251,8 +347,15 @@ static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) > return 0; > > ret = drm_gem_shmem_get_pages_locked(shmem); > - if (!ret) > + if (!ret) { > + ret = drm_gem_shmem_swapin_locked(shmem); > + if (ret) { > + drm_gem_shmem_put_pages_locked(shmem); > + return ret; > + } > + > refcount_set(&shmem->pages_pin_count, 1); > + } > > return ret; > } > @@ -448,29 +551,54 @@ int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv) > > madv = shmem->madv; > > + drm_gem_shmem_update_pages_state_locked(shmem); > + > return (madv >= 0); > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise_locked); > > -void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) > +int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) > +{ > + struct drm_gem_object *obj = &shmem->base; > + int ret; > + > + ret = dma_resv_lock_interruptible(obj->resv, NULL); > + if (ret) > + return ret; > + > + ret = drm_gem_shmem_madvise_locked(shmem, madv); > + dma_resv_unlock(obj->resv); > + > + return ret; > +} > +EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise); > + > +static void drm_gem_shmem_unpin_pages_locked(struct drm_gem_shmem_object *shmem) > { > struct drm_gem_object *obj = &shmem->base; > struct drm_device *dev = obj->dev; > > dma_resv_assert_held(shmem->base.resv); > > - drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); > + if (shmem->evicted) > + return; > > dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); > + drm_gem_shmem_release_pages_locked(shmem); > + drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); > + > sg_free_table(shmem->sgt); > kfree(shmem->sgt); > shmem->sgt = NULL; > +} > > - drm_gem_shmem_put_pages_locked(shmem); > +void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) > +{ > + struct drm_gem_object *obj = &shmem->base; > > - shmem->madv = -1; > + drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); > > - drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); > + drm_gem_shmem_unpin_pages_locked(shmem); > drm_gem_free_mmap_offset(obj); > > /* Our goal here is to return as much of the memory as > @@ -481,9 +609,59 @@ void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) > shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1); > > invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); > + > + shmem->madv = -1; > + shmem->evicted = false; > + drm_gem_shmem_update_pages_state_locked(shmem); > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_purge_locked); > > +/** > + * drm_gem_shmem_swapin_locked() - Moves shmem GEM back to memory and enables > + * hardware access to the memory. > + * @shmem: shmem GEM object > + * > + * This function moves shmem GEM back to memory if it was previously evicted > + * by the memory shrinker. The GEM is ready to use on success. > + * > + * Returns: > + * 0 on success or a negative error code on failure. > + */ > +int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem) > +{ > + struct drm_gem_object *obj = &shmem->base; > + struct sg_table *sgt; > + int err; > + > + dma_resv_assert_held(shmem->base.resv); > + > + if (shmem->evicted) { > + err = drm_gem_shmem_acquire_pages(shmem, false); > + if (err) > + return err; > + > + sgt = drm_gem_shmem_get_sg_table(shmem); > + if (IS_ERR(sgt)) > + return PTR_ERR(sgt); > + > + err = dma_map_sgtable(obj->dev->dev, sgt, > + DMA_BIDIRECTIONAL, 0); > + if (err) { > + sg_free_table(sgt); > + kfree(sgt); > + return err; > + } > + > + shmem->sgt = sgt; > + shmem->evicted = false; > + > + drm_gem_shmem_update_pages_state_locked(shmem); > + } > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(drm_gem_shmem_swapin_locked); > + > /** > * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object > * @file: DRM file structure to create the dumb buffer for > @@ -530,22 +708,34 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) > vm_fault_t ret; > struct page *page; > pgoff_t page_offset; > + bool pages_unpinned; > + int err; > > /* We don't use vmf->pgoff since that has the fake offset */ > page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; > > dma_resv_lock(shmem->base.resv, NULL); > > - if (page_offset >= num_pages || > - drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || > - shmem->madv < 0) { > + /* Sanity-check that we have the pages pointer when it should present */ > + pages_unpinned = (shmem->evicted || shmem->madv < 0 || > + !refcount_read(&shmem->pages_use_count)); > + drm_WARN_ON_ONCE(obj->dev, !shmem->pages ^ pages_unpinned); > + > + if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) { > ret = VM_FAULT_SIGBUS; > } else { > + err = drm_gem_shmem_swapin_locked(shmem); > + if (err) { > + ret = VM_FAULT_OOM; > + goto unlock; > + } > + > page = shmem->pages[page_offset]; > > ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); > } > > +unlock: > dma_resv_unlock(shmem->base.resv); > > return ret; > @@ -568,6 +758,7 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) > drm_WARN_ON_ONCE(obj->dev, > !refcount_inc_not_zero(&shmem->pages_use_count)); > > + drm_gem_shmem_update_pages_state_locked(shmem); > dma_resv_unlock(shmem->base.resv); > > drm_gem_vm_open(vma); > @@ -653,7 +844,9 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, > drm_printf_indent(p, indent, "pages_pin_count=%u\n", refcount_read(&shmem->pages_pin_count)); > drm_printf_indent(p, indent, "pages_use_count=%u\n", refcount_read(&shmem->pages_use_count)); > drm_printf_indent(p, indent, "vmap_use_count=%u\n", refcount_read(&shmem->vmap_use_count)); > + drm_printf_indent(p, indent, "evicted=%d\n", shmem->evicted); > drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); > + drm_printf_indent(p, indent, "madv=%d\n", shmem->madv); > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_print_info); > > @@ -715,6 +908,8 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ > > shmem->sgt = sgt; > > + drm_gem_shmem_update_pages_state_locked(shmem); > + > return sgt; > > err_free_sgt: > @@ -802,6 +997,191 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, > } > EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); > > +static struct drm_gem_shmem_shrinker * > +to_drm_gem_shmem_shrinker(struct shrinker *shrinker) > +{ > + return container_of(shrinker, struct drm_gem_shmem_shrinker, base); > +} > + > +static unsigned long > +drm_gem_shmem_shrinker_count_objects(struct shrinker *shrinker, > + struct shrink_control *sc) > +{ > + struct drm_gem_shmem_shrinker *shmem_shrinker = > + to_drm_gem_shmem_shrinker(shrinker); > + unsigned long count = shmem_shrinker->lru_evictable.count; > + > + if (count >= SHRINK_EMPTY) > + return SHRINK_EMPTY - 1; > + > + return count ?: SHRINK_EMPTY; > +} > + > +void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem) > +{ > + struct drm_gem_object *obj = &shmem->base; > + > + drm_WARN_ON(obj->dev, !drm_gem_shmem_is_evictable(shmem)); > + drm_WARN_ON(obj->dev, shmem->evicted); > + > + drm_gem_shmem_unpin_pages_locked(shmem); > + > + shmem->evicted = true; > + drm_gem_shmem_update_pages_state_locked(shmem); > +} > +EXPORT_SYMBOL_GPL(drm_gem_shmem_evict_locked); > + > +static bool drm_gem_shmem_shrinker_evict_locked(struct drm_gem_object *obj) > +{ > + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); > + int err; > + > + if (!drm_gem_shmem_is_evictable(shmem) || > + get_nr_swap_pages() < obj->size >> PAGE_SHIFT) > + return false; > + > + err = drm_gem_evict_locked(obj); > + if (err) > + return false; > + > + return true; > +} > + > +static bool drm_gem_shmem_shrinker_purge_locked(struct drm_gem_object *obj) > +{ > + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); > + int err; > + > + if (!drm_gem_shmem_is_purgeable(shmem)) > + return false; > + > + err = drm_gem_evict_locked(obj); > + if (err) > + return false; > + > + return true; > +} > + > +static unsigned long > +drm_gem_shmem_shrinker_scan_objects(struct shrinker *shrinker, > + struct shrink_control *sc) > +{ > + struct drm_gem_shmem_shrinker *shmem_shrinker; > + unsigned long nr_to_scan = sc->nr_to_scan; > + unsigned long remaining = 0; > + unsigned long freed = 0; > + > + shmem_shrinker = to_drm_gem_shmem_shrinker(shrinker); > + > + /* purge as many objects as we can */ > + freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable, > + nr_to_scan, &remaining, > + drm_gem_shmem_shrinker_purge_locked); > + > + /* evict as many objects as we can */ > + if (freed < nr_to_scan) > + freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable, > + nr_to_scan - freed, &remaining, > + drm_gem_shmem_shrinker_evict_locked); > + > + return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP; > +} > + > +static int drm_gem_shmem_shrinker_init(struct drm_gem_shmem *shmem_mm, > + const char *shrinker_name) > +{ > + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; > + int err; > + > + shmem_shrinker->base.count_objects = drm_gem_shmem_shrinker_count_objects; > + shmem_shrinker->base.scan_objects = drm_gem_shmem_shrinker_scan_objects; > + shmem_shrinker->base.seeks = DEFAULT_SEEKS; > + > + mutex_init(&shmem_shrinker->lock); > + drm_gem_lru_init(&shmem_shrinker->lru_evictable, &shmem_shrinker->lock); > + drm_gem_lru_init(&shmem_shrinker->lru_evicted, &shmem_shrinker->lock); > + drm_gem_lru_init(&shmem_shrinker->lru_pinned, &shmem_shrinker->lock); > + > + err = register_shrinker(&shmem_shrinker->base, shrinker_name); > + if (err) { > + mutex_destroy(&shmem_shrinker->lock); > + return err; > + } > + > + return 0; > +} > + > +static void drm_gem_shmem_shrinker_release(struct drm_device *dev, > + struct drm_gem_shmem *shmem_mm) > +{ > + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; > + > + unregister_shrinker(&shmem_shrinker->base); > + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evictable.list)); > + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evicted.list)); > + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_pinned.list)); > + mutex_destroy(&shmem_shrinker->lock); > +} > + > +static int drm_gem_shmem_init(struct drm_device *dev) > +{ > + int err; > + > + if (drm_WARN_ON(dev, dev->shmem_mm)) > + return -EBUSY; > + > + dev->shmem_mm = kzalloc(sizeof(*dev->shmem_mm), GFP_KERNEL); > + if (!dev->shmem_mm) > + return -ENOMEM; > + > + err = drm_gem_shmem_shrinker_init(dev->shmem_mm, dev->unique); > + if (err) > + goto free_gem_shmem; > + > + return 0; > + > +free_gem_shmem: > + kfree(dev->shmem_mm); > + dev->shmem_mm = NULL; > + > + return err; > +} > + > +static void drm_gem_shmem_release(struct drm_device *dev, void *ptr) > +{ > + struct drm_gem_shmem *shmem_mm = dev->shmem_mm; > + > + drm_gem_shmem_shrinker_release(dev, shmem_mm); > + dev->shmem_mm = NULL; > + kfree(shmem_mm); > +} > + > +/** > + * drmm_gem_shmem_init() - Initialize drm-shmem internals > + * @dev: DRM device > + * > + * Cleanup is automatically managed as part of DRM device releasing. > + * Calling this function multiple times will result in a error. > + * > + * Returns: > + * 0 on success or a negative error code on failure. > + */ > +int drmm_gem_shmem_init(struct drm_device *dev) > +{ > + int err; > + > + err = drm_gem_shmem_init(dev); > + if (err) > + return err; > + > + err = drmm_add_action_or_reset(dev, drm_gem_shmem_release, NULL); > + if (err) > + return err; > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(drmm_gem_shmem_init); > + > MODULE_DESCRIPTION("DRM SHMEM memory-management helpers"); > MODULE_IMPORT_NS(DMA_BUF); > MODULE_LICENSE("GPL v2"); > diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c > index 72193bd734e1..1aa94fff7072 100644 > --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c > +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c > @@ -15,6 +15,13 @@ > #include "panfrost_gem.h" > #include "panfrost_mmu.h" > > +static bool panfrost_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) > +{ > + return (shmem->madv > 0) && > + !refcount_read(&shmem->pages_pin_count) && shmem->sgt && > + !shmem->base.dma_buf && !shmem->base.import_attach; > +} > + > static unsigned long > panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) > { > @@ -27,7 +34,7 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc > return 0; > > list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) { > - if (drm_gem_shmem_is_purgeable(shmem)) > + if (panfrost_gem_shmem_is_purgeable(shmem)) > count += shmem->base.size >> PAGE_SHIFT; > } > > diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h > index 7cf4afae2e79..a978f0cb5e84 100644 > --- a/include/drm/drm_device.h > +++ b/include/drm/drm_device.h > @@ -16,6 +16,7 @@ struct drm_vblank_crtc; > struct drm_vma_offset_manager; > struct drm_vram_mm; > struct drm_fb_helper; > +struct drm_gem_shmem_shrinker; > > struct inode; > > @@ -290,8 +291,13 @@ struct drm_device { > /** @vma_offset_manager: GEM information */ > struct drm_vma_offset_manager *vma_offset_manager; > > - /** @vram_mm: VRAM MM memory manager */ > - struct drm_vram_mm *vram_mm; > + union { > + /** @vram_mm: VRAM MM memory manager */ > + struct drm_vram_mm *vram_mm; > + > + /** @shmem_mm: SHMEM GEM memory manager */ > + struct drm_gem_shmem *shmem_mm; > + }; > > /** > * @switch_power_state: > diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h > index 63e91e8f2d5c..65c99da25048 100644 > --- a/include/drm/drm_gem_shmem_helper.h > +++ b/include/drm/drm_gem_shmem_helper.h > @@ -6,6 +6,7 @@ > #include <linux/fs.h> > #include <linux/mm.h> > #include <linux/mutex.h> > +#include <linux/shrinker.h> > > #include <drm/drm_file.h> > #include <drm/drm_gem.h> > @@ -13,6 +14,7 @@ > #include <drm/drm_prime.h> > > struct dma_buf_attachment; > +struct drm_device; > struct drm_mode_create_dumb; > struct drm_printer; > struct sg_table; > @@ -53,8 +55,8 @@ struct drm_gem_shmem_object { > * @madv: State for madvise > * > * 0 is active/inuse. > + * 1 is not-needed/can-be-purged > * A negative value is the object is purged. > - * Positive values are driver specific and not used by the helpers. > */ > int madv; > > @@ -115,6 +117,12 @@ struct drm_gem_shmem_object { > * @map_wc: map object write-combined (instead of using shmem defaults). > */ > bool map_wc : 1; > + > + /** > + * @evicted: True if shmem pages are evicted by the memory shrinker. > + * Used internally by memory shrinker. > + */ > + bool evicted : 1; > }; > > #define to_drm_gem_shmem_obj(obj) \ > @@ -133,14 +141,22 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, > int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); > > int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv); > +int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); > > static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) > { > - return (shmem->madv > 0) && > - !refcount_read(&shmem->pages_pin_count) && shmem->sgt && > - !shmem->base.dma_buf && !shmem->base.import_attach; > + dma_resv_assert_held(shmem->base.resv); > + > + return (shmem->madv > 0) && shmem->base.funcs->evict && > + refcount_read(&shmem->pages_use_count) && > + !refcount_read(&shmem->pages_pin_count) && > + !shmem->base.dma_buf && !shmem->base.import_attach && > + (shmem->sgt || shmem->evicted); > } > > +int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem); > + > +void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem); > void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); > > struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); > @@ -284,6 +300,53 @@ static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct v > return drm_gem_shmem_mmap(shmem, vma); > } > > +/** > + * drm_gem_shmem_object_madvise - unlocked GEM object function for drm_gem_shmem_madvise_locked() > + * @obj: GEM object > + * @madv: Madvise value > + * > + * This function wraps drm_gem_shmem_madvise_locked(), providing unlocked variant. > + * > + * Returns: > + * 0 on success or a negative error code on failure. > + */ > +static inline int drm_gem_shmem_object_madvise(struct drm_gem_object *obj, int madv) > +{ > + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); > + > + return drm_gem_shmem_madvise(shmem, madv); > +} > + > +/** > + * struct drm_gem_shmem_shrinker - Memory shrinker of GEM shmem memory manager > + */ > +struct drm_gem_shmem_shrinker { > + /** @base: Shrinker for purging shmem GEM objects */ > + struct shrinker base; > + > + /** @lock: Protects @lru_* */ > + struct mutex lock; > + > + /** @lru_pinned: List of pinned shmem GEM objects */ > + struct drm_gem_lru lru_pinned; > + > + /** @lru_evictable: List of shmem GEM objects to be evicted */ > + struct drm_gem_lru lru_evictable; > + > + /** @lru_evicted: List of evicted shmem GEM objects */ > + struct drm_gem_lru lru_evicted; > +}; > + > +/** > + * struct drm_gem_shmem - GEM shmem memory manager > + */ > +struct drm_gem_shmem { > + /** @shrinker: GEM shmem shrinker */ > + struct drm_gem_shmem_shrinker shrinker; > +}; > + > +int drmm_gem_shmem_init(struct drm_device *dev); > + > /* > * Driver ops > */
Hi Dmitry, kernel test robot noticed the following build warnings: https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Dmitry-Osipenko/drm-shmem-helper-Fix-UAF-in-error-path-when-freeing-SGT-of-imported-GEM/20230904-011037 base: linus/master patch link: https://lore.kernel.org/r/20230903170736.513347-16-dmitry.osipenko%40collabora.com patch subject: [PATCH v16 15/20] drm/shmem-helper: Add memory shrinker config: x86_64-randconfig-161-20230907 (https://download.01.org/0day-ci/archive/20230907/202309070814.jyGJOJzY-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce: (https://download.01.org/0day-ci/archive/20230907/202309070814.jyGJOJzY-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Reported-by: Dan Carpenter <dan.carpenter@linaro.org> | Closes: https://lore.kernel.org/r/202309070814.jyGJOJzY-lkp@intel.com/ smatch warnings: drivers/gpu/drm/drm_gem_shmem_helper.c:733 drm_gem_shmem_fault() error: we previously assumed 'shmem->pages' could be null (see line 724) vim +733 drivers/gpu/drm/drm_gem_shmem_helper.c 2194a63a818db7 Noralf Trønnes 2019-03-12 702 static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) 2194a63a818db7 Noralf Trønnes 2019-03-12 703 { 2194a63a818db7 Noralf Trønnes 2019-03-12 704 struct vm_area_struct *vma = vmf->vma; 2194a63a818db7 Noralf Trønnes 2019-03-12 705 struct drm_gem_object *obj = vma->vm_private_data; 2194a63a818db7 Noralf Trønnes 2019-03-12 706 struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); 2194a63a818db7 Noralf Trønnes 2019-03-12 707 loff_t num_pages = obj->size >> PAGE_SHIFT; d611b4a0907cec Neil Roberts 2021-02-23 708 vm_fault_t ret; 2194a63a818db7 Noralf Trønnes 2019-03-12 709 struct page *page; 11d5a4745e00e7 Neil Roberts 2021-02-23 710 pgoff_t page_offset; 2c607edf57db6a Dmitry Osipenko 2023-09-03 711 bool pages_unpinned; 2c607edf57db6a Dmitry Osipenko 2023-09-03 712 int err; 11d5a4745e00e7 Neil Roberts 2021-02-23 713 11d5a4745e00e7 Neil Roberts 2021-02-23 714 /* We don't use vmf->pgoff since that has the fake offset */ 11d5a4745e00e7 Neil Roberts 2021-02-23 715 page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; 2194a63a818db7 Noralf Trønnes 2019-03-12 716 21aa27ddc58269 Dmitry Osipenko 2023-05-30 717 dma_resv_lock(shmem->base.resv, NULL); 2194a63a818db7 Noralf Trønnes 2019-03-12 718 2c607edf57db6a Dmitry Osipenko 2023-09-03 719 /* Sanity-check that we have the pages pointer when it should present */ 2c607edf57db6a Dmitry Osipenko 2023-09-03 720 pages_unpinned = (shmem->evicted || shmem->madv < 0 || 2c607edf57db6a Dmitry Osipenko 2023-09-03 721 !refcount_read(&shmem->pages_use_count)); 2c607edf57db6a Dmitry Osipenko 2023-09-03 722 drm_WARN_ON_ONCE(obj->dev, !shmem->pages ^ pages_unpinned); 2c607edf57db6a Dmitry Osipenko 2023-09-03 723 2c607edf57db6a Dmitry Osipenko 2023-09-03 @724 if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) { ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Should this be || instead of &&? (The other thing that people do is add "!shmem->evicted" for readability even though it doesn't need to be checked. So maybe that's the issue, but the checker assumes it needs to be checked). d611b4a0907cec Neil Roberts 2021-02-23 725 ret = VM_FAULT_SIGBUS; d611b4a0907cec Neil Roberts 2021-02-23 726 } else { 2c607edf57db6a Dmitry Osipenko 2023-09-03 727 err = drm_gem_shmem_swapin_locked(shmem); Or maybe it's because the kbuild bot can't use the cross function db and shmem->pages is assigned here? 2c607edf57db6a Dmitry Osipenko 2023-09-03 728 if (err) { 2c607edf57db6a Dmitry Osipenko 2023-09-03 729 ret = VM_FAULT_OOM; 2c607edf57db6a Dmitry Osipenko 2023-09-03 730 goto unlock; 2c607edf57db6a Dmitry Osipenko 2023-09-03 731 } 2c607edf57db6a Dmitry Osipenko 2023-09-03 732 11d5a4745e00e7 Neil Roberts 2021-02-23 @733 page = shmem->pages[page_offset]; ^^^^^^^^^^^^ Unchecked dereference 2194a63a818db7 Noralf Trønnes 2019-03-12 734 8b93d1d7dbd578 Daniel Vetter 2021-08-12 735 ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); d611b4a0907cec Neil Roberts 2021-02-23 736 } d611b4a0907cec Neil Roberts 2021-02-23 737 2c607edf57db6a Dmitry Osipenko 2023-09-03 738 unlock: 21aa27ddc58269 Dmitry Osipenko 2023-05-30 739 dma_resv_unlock(shmem->base.resv); d611b4a0907cec Neil Roberts 2021-02-23 740 d611b4a0907cec Neil Roberts 2021-02-23 741 return ret; 2194a63a818db7 Noralf Trønnes 2019-03-12 742 }
On 9/7/23 13:03, Dan Carpenter wrote: > 2c607edf57db6a Dmitry Osipenko 2023-09-03 @724 if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) { > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > Should this be || instead of &&? (The other thing that people do is > add "!shmem->evicted" for readability even though it doesn't need to be > checked. So maybe that's the issue, but the checker assumes it needs to > be checked). > > d611b4a0907cec Neil Roberts 2021-02-23 725 ret = VM_FAULT_SIGBUS; > d611b4a0907cec Neil Roberts 2021-02-23 726 } else { > 2c607edf57db6a Dmitry Osipenko 2023-09-03 727 err = drm_gem_shmem_swapin_locked(shmem); > > Or maybe it's because the kbuild bot can't use the cross function db > and shmem->pages is assigned here? Should be a function db problem. The shmem->pages won't be NULL if drm_gem_shmem_swapin_locked() succeeds.
On 9/5/23 11:03, Boris Brezillon wrote: >> * But >> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can >> + * cause a locking order inversion between reservation_ww_class_mutex >> + * and fs_reclaim. >> + * >> + * This deadlock is not actually possible, because no one should >> + * be already holding the lock when drm_gem_shmem_free() is called. >> + * Unfortunately lockdep is not aware of this detail. So when the >> + * refcount drops to zero, don't touch the reservation lock. >> + */ >> + if (shmem->got_pages_sgt && >> + refcount_dec_and_test(&shmem->pages_use_count)) { >> + drm_gem_shmem_do_release_pages_locked(shmem); >> + shmem->got_pages_sgt = false; >> } > Leaking memory is the right thing to do if pages_use_count > 1 (it's > better to leak than having someone access memory it no longer owns), but > I think it's worth mentioning in the above comment. It's unlikely that it will be only a leak without a following up use-after-free. Neither is acceptable. The drm_gem_shmem_free() could be changed such that kernel won't blow up on a refcnt bug, but that's not worthwhile doing because drivers shouldn't have silly bugs.
On Wed, 13 Sep 2023 03:56:14 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > On 9/5/23 11:03, Boris Brezillon wrote: > >> * But > >> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > >> + * cause a locking order inversion between reservation_ww_class_mutex > >> + * and fs_reclaim. > >> + * > >> + * This deadlock is not actually possible, because no one should > >> + * be already holding the lock when drm_gem_shmem_free() is called. > >> + * Unfortunately lockdep is not aware of this detail. So when the > >> + * refcount drops to zero, don't touch the reservation lock. > >> + */ > >> + if (shmem->got_pages_sgt && > >> + refcount_dec_and_test(&shmem->pages_use_count)) { > >> + drm_gem_shmem_do_release_pages_locked(shmem); > >> + shmem->got_pages_sgt = false; > >> } > > Leaking memory is the right thing to do if pages_use_count > 1 (it's > > better to leak than having someone access memory it no longer owns), but > > I think it's worth mentioning in the above comment. > > It's unlikely that it will be only a leak without a following up > use-after-free. Neither is acceptable. Not necessarily, if you have a page leak, it could be that the GPU has access to those pages, but doesn't need the GEM object anymore (pages are mapped by the iommu, which doesn't need shmem->sgt or shmem->pages after the mapping is created). Without a WARN_ON(), this can go unnoticed and lead to memory corruptions/information leaks. > > The drm_gem_shmem_free() could be changed such that kernel won't blow up > on a refcnt bug, but that's not worthwhile doing because drivers > shouldn't have silly bugs. We definitely don't want to fix that, but we want to complain loudly (WARN_ON()), and make sure the risk is limited (preventing memory from being re-assigned to someone else by not freeing it).
On 9/13/23 10:48, Boris Brezillon wrote: > On Wed, 13 Sep 2023 03:56:14 +0300 > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >> On 9/5/23 11:03, Boris Brezillon wrote: >>>> * But >>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can >>>> + * cause a locking order inversion between reservation_ww_class_mutex >>>> + * and fs_reclaim. >>>> + * >>>> + * This deadlock is not actually possible, because no one should >>>> + * be already holding the lock when drm_gem_shmem_free() is called. >>>> + * Unfortunately lockdep is not aware of this detail. So when the >>>> + * refcount drops to zero, don't touch the reservation lock. >>>> + */ >>>> + if (shmem->got_pages_sgt && >>>> + refcount_dec_and_test(&shmem->pages_use_count)) { >>>> + drm_gem_shmem_do_release_pages_locked(shmem); >>>> + shmem->got_pages_sgt = false; >>>> } >>> Leaking memory is the right thing to do if pages_use_count > 1 (it's >>> better to leak than having someone access memory it no longer owns), but >>> I think it's worth mentioning in the above comment. >> >> It's unlikely that it will be only a leak without a following up >> use-after-free. Neither is acceptable. > > Not necessarily, if you have a page leak, it could be that the GPU has > access to those pages, but doesn't need the GEM object anymore > (pages are mapped by the iommu, which doesn't need shmem->sgt or > shmem->pages after the mapping is created). Without a WARN_ON(), this > can go unnoticed and lead to memory corruptions/information leaks. > >> >> The drm_gem_shmem_free() could be changed such that kernel won't blow up >> on a refcnt bug, but that's not worthwhile doing because drivers >> shouldn't have silly bugs. > > We definitely don't want to fix that, but we want to complain loudly > (WARN_ON()), and make sure the risk is limited (preventing memory from > being re-assigned to someone else by not freeing it). That's what the code did and continues to do here. Not exactly sure what you're trying to say. I'm going to relocate the comment in v17 to put_pages(), we can continue discussing it there if I'm missing yours point.
On Thu, 14 Sep 2023 07:02:52 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > On 9/13/23 10:48, Boris Brezillon wrote: > > On Wed, 13 Sep 2023 03:56:14 +0300 > > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > > > >> On 9/5/23 11:03, Boris Brezillon wrote: > >>>> * But > >>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > >>>> + * cause a locking order inversion between reservation_ww_class_mutex > >>>> + * and fs_reclaim. > >>>> + * > >>>> + * This deadlock is not actually possible, because no one should > >>>> + * be already holding the lock when drm_gem_shmem_free() is called. > >>>> + * Unfortunately lockdep is not aware of this detail. So when the > >>>> + * refcount drops to zero, don't touch the reservation lock. > >>>> + */ > >>>> + if (shmem->got_pages_sgt && > >>>> + refcount_dec_and_test(&shmem->pages_use_count)) { > >>>> + drm_gem_shmem_do_release_pages_locked(shmem); > >>>> + shmem->got_pages_sgt = false; > >>>> } > >>> Leaking memory is the right thing to do if pages_use_count > 1 (it's > >>> better to leak than having someone access memory it no longer owns), but > >>> I think it's worth mentioning in the above comment. > >> > >> It's unlikely that it will be only a leak without a following up > >> use-after-free. Neither is acceptable. > > > > Not necessarily, if you have a page leak, it could be that the GPU has > > access to those pages, but doesn't need the GEM object anymore > > (pages are mapped by the iommu, which doesn't need shmem->sgt or > > shmem->pages after the mapping is created). Without a WARN_ON(), this > > can go unnoticed and lead to memory corruptions/information leaks. > > > >> > >> The drm_gem_shmem_free() could be changed such that kernel won't blow up > >> on a refcnt bug, but that's not worthwhile doing because drivers > >> shouldn't have silly bugs. > > > > We definitely don't want to fix that, but we want to complain loudly > > (WARN_ON()), and make sure the risk is limited (preventing memory from > > being re-assigned to someone else by not freeing it). > > That's what the code did and continues to do here. Not exactly sure what > you're trying to say. I'm going to relocate the comment in v17 to > put_pages(), we can continue discussing it there if I'm missing yours point. > I'm just saying it would be worth mentioning that we're intentionally leaking memory if shmem->pages_use_count > 1. Something like: /** * shmem->pages_use_count should be 1 when ->sgt != NULL and * zero otherwise. If some users still hold a pages reference * that's a bug, and we intentionally leak the pages so they * can't be re-allocated to someone else while the GPU/CPU * still have access to it. */ drm_WARN_ON(drm, refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) drm_gem_shmem_free_pages(shmem);
On 9/14/23 10:36, Boris Brezillon wrote: > On Thu, 14 Sep 2023 07:02:52 +0300 > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >> On 9/13/23 10:48, Boris Brezillon wrote: >>> On Wed, 13 Sep 2023 03:56:14 +0300 >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>> >>>> On 9/5/23 11:03, Boris Brezillon wrote: >>>>>> * But >>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can >>>>>> + * cause a locking order inversion between reservation_ww_class_mutex >>>>>> + * and fs_reclaim. >>>>>> + * >>>>>> + * This deadlock is not actually possible, because no one should >>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. >>>>>> + * Unfortunately lockdep is not aware of this detail. So when the >>>>>> + * refcount drops to zero, don't touch the reservation lock. >>>>>> + */ >>>>>> + if (shmem->got_pages_sgt && >>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { >>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); >>>>>> + shmem->got_pages_sgt = false; >>>>>> } >>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's >>>>> better to leak than having someone access memory it no longer owns), but >>>>> I think it's worth mentioning in the above comment. >>>> >>>> It's unlikely that it will be only a leak without a following up >>>> use-after-free. Neither is acceptable. >>> >>> Not necessarily, if you have a page leak, it could be that the GPU has >>> access to those pages, but doesn't need the GEM object anymore >>> (pages are mapped by the iommu, which doesn't need shmem->sgt or >>> shmem->pages after the mapping is created). Without a WARN_ON(), this >>> can go unnoticed and lead to memory corruptions/information leaks. >>> >>>> >>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up >>>> on a refcnt bug, but that's not worthwhile doing because drivers >>>> shouldn't have silly bugs. >>> >>> We definitely don't want to fix that, but we want to complain loudly >>> (WARN_ON()), and make sure the risk is limited (preventing memory from >>> being re-assigned to someone else by not freeing it). >> >> That's what the code did and continues to do here. Not exactly sure what >> you're trying to say. I'm going to relocate the comment in v17 to >> put_pages(), we can continue discussing it there if I'm missing yours point. >> > > I'm just saying it would be worth mentioning that we're intentionally > leaking memory if shmem->pages_use_count > 1. Something like: > > /** > * shmem->pages_use_count should be 1 when ->sgt != NULL and > * zero otherwise. If some users still hold a pages reference > * that's a bug, and we intentionally leak the pages so they > * can't be re-allocated to someone else while the GPU/CPU > * still have access to it. > */ > drm_WARN_ON(drm, > refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); > if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) > drm_gem_shmem_free_pages(shmem); That may be acceptable, but only once there will a driver using this feature.
On Thu, 14 Sep 2023 10:50:32 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > On 9/14/23 10:36, Boris Brezillon wrote: > > On Thu, 14 Sep 2023 07:02:52 +0300 > > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > > > >> On 9/13/23 10:48, Boris Brezillon wrote: > >>> On Wed, 13 Sep 2023 03:56:14 +0300 > >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>> > >>>> On 9/5/23 11:03, Boris Brezillon wrote: > >>>>>> * But > >>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > >>>>>> + * cause a locking order inversion between reservation_ww_class_mutex > >>>>>> + * and fs_reclaim. > >>>>>> + * > >>>>>> + * This deadlock is not actually possible, because no one should > >>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. > >>>>>> + * Unfortunately lockdep is not aware of this detail. So when the > >>>>>> + * refcount drops to zero, don't touch the reservation lock. > >>>>>> + */ > >>>>>> + if (shmem->got_pages_sgt && > >>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { > >>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); > >>>>>> + shmem->got_pages_sgt = false; > >>>>>> } > >>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's > >>>>> better to leak than having someone access memory it no longer owns), but > >>>>> I think it's worth mentioning in the above comment. > >>>> > >>>> It's unlikely that it will be only a leak without a following up > >>>> use-after-free. Neither is acceptable. > >>> > >>> Not necessarily, if you have a page leak, it could be that the GPU has > >>> access to those pages, but doesn't need the GEM object anymore > >>> (pages are mapped by the iommu, which doesn't need shmem->sgt or > >>> shmem->pages after the mapping is created). Without a WARN_ON(), this > >>> can go unnoticed and lead to memory corruptions/information leaks. > >>> > >>>> > >>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up > >>>> on a refcnt bug, but that's not worthwhile doing because drivers > >>>> shouldn't have silly bugs. > >>> > >>> We definitely don't want to fix that, but we want to complain loudly > >>> (WARN_ON()), and make sure the risk is limited (preventing memory from > >>> being re-assigned to someone else by not freeing it). > >> > >> That's what the code did and continues to do here. Not exactly sure what > >> you're trying to say. I'm going to relocate the comment in v17 to > >> put_pages(), we can continue discussing it there if I'm missing yours point. > >> > > > > I'm just saying it would be worth mentioning that we're intentionally > > leaking memory if shmem->pages_use_count > 1. Something like: > > > > /** > > * shmem->pages_use_count should be 1 when ->sgt != NULL and > > * zero otherwise. If some users still hold a pages reference > > * that's a bug, and we intentionally leak the pages so they > > * can't be re-allocated to someone else while the GPU/CPU > > * still have access to it. > > */ > > drm_WARN_ON(drm, > > refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); > > if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) > > drm_gem_shmem_free_pages(shmem); > > That may be acceptable, but only once there will a driver using this > feature. Which feature? That's not related to a specific feature, that's just how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can only be released in drm_gem_shmem_free(), because sgt users are not refcounted and the sgt stays around until the GEM object is freed or its pages are evicted. The only valid cases we have at the moment are: - pages_use_count == 1 && sgt != NULL - pages_use_count == 0 any other situations are buggy.
On 9/14/23 11:27, Boris Brezillon wrote: > On Thu, 14 Sep 2023 10:50:32 +0300 > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >> On 9/14/23 10:36, Boris Brezillon wrote: >>> On Thu, 14 Sep 2023 07:02:52 +0300 >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>> >>>> On 9/13/23 10:48, Boris Brezillon wrote: >>>>> On Wed, 13 Sep 2023 03:56:14 +0300 >>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>>>> >>>>>> On 9/5/23 11:03, Boris Brezillon wrote: >>>>>>>> * But >>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can >>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex >>>>>>>> + * and fs_reclaim. >>>>>>>> + * >>>>>>>> + * This deadlock is not actually possible, because no one should >>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. >>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the >>>>>>>> + * refcount drops to zero, don't touch the reservation lock. >>>>>>>> + */ >>>>>>>> + if (shmem->got_pages_sgt && >>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { >>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); >>>>>>>> + shmem->got_pages_sgt = false; >>>>>>>> } >>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's >>>>>>> better to leak than having someone access memory it no longer owns), but >>>>>>> I think it's worth mentioning in the above comment. >>>>>> >>>>>> It's unlikely that it will be only a leak without a following up >>>>>> use-after-free. Neither is acceptable. >>>>> >>>>> Not necessarily, if you have a page leak, it could be that the GPU has >>>>> access to those pages, but doesn't need the GEM object anymore >>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or >>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this >>>>> can go unnoticed and lead to memory corruptions/information leaks. >>>>> >>>>>> >>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up >>>>>> on a refcnt bug, but that's not worthwhile doing because drivers >>>>>> shouldn't have silly bugs. >>>>> >>>>> We definitely don't want to fix that, but we want to complain loudly >>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from >>>>> being re-assigned to someone else by not freeing it). >>>> >>>> That's what the code did and continues to do here. Not exactly sure what >>>> you're trying to say. I'm going to relocate the comment in v17 to >>>> put_pages(), we can continue discussing it there if I'm missing yours point. >>>> >>> >>> I'm just saying it would be worth mentioning that we're intentionally >>> leaking memory if shmem->pages_use_count > 1. Something like: >>> >>> /** >>> * shmem->pages_use_count should be 1 when ->sgt != NULL and >>> * zero otherwise. If some users still hold a pages reference >>> * that's a bug, and we intentionally leak the pages so they >>> * can't be re-allocated to someone else while the GPU/CPU >>> * still have access to it. >>> */ >>> drm_WARN_ON(drm, >>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); >>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) >>> drm_gem_shmem_free_pages(shmem); >> >> That may be acceptable, but only once there will a driver using this >> feature. > > Which feature? That's not related to a specific feature, that's just > how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can > only be released in drm_gem_shmem_free(), because sgt users are not > refcounted and the sgt stays around until the GEM object is freed or > its pages are evicted. The only valid cases we have at the moment are: > > - pages_use_count == 1 && sgt != NULL > - pages_use_count == 0 > > any other situations are buggy. sgt may belong to dma-buf for which pages_use_count=0, this can't be done until sgt mess is sorted out
On Thu, 14 Sep 2023 14:36:23 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > On 9/14/23 11:27, Boris Brezillon wrote: > > On Thu, 14 Sep 2023 10:50:32 +0300 > > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > > > >> On 9/14/23 10:36, Boris Brezillon wrote: > >>> On Thu, 14 Sep 2023 07:02:52 +0300 > >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>> > >>>> On 9/13/23 10:48, Boris Brezillon wrote: > >>>>> On Wed, 13 Sep 2023 03:56:14 +0300 > >>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>>>> > >>>>>> On 9/5/23 11:03, Boris Brezillon wrote: > >>>>>>>> * But > >>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > >>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex > >>>>>>>> + * and fs_reclaim. > >>>>>>>> + * > >>>>>>>> + * This deadlock is not actually possible, because no one should > >>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. > >>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the > >>>>>>>> + * refcount drops to zero, don't touch the reservation lock. > >>>>>>>> + */ > >>>>>>>> + if (shmem->got_pages_sgt && > >>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { > >>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); > >>>>>>>> + shmem->got_pages_sgt = false; > >>>>>>>> } > >>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's > >>>>>>> better to leak than having someone access memory it no longer owns), but > >>>>>>> I think it's worth mentioning in the above comment. > >>>>>> > >>>>>> It's unlikely that it will be only a leak without a following up > >>>>>> use-after-free. Neither is acceptable. > >>>>> > >>>>> Not necessarily, if you have a page leak, it could be that the GPU has > >>>>> access to those pages, but doesn't need the GEM object anymore > >>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or > >>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this > >>>>> can go unnoticed and lead to memory corruptions/information leaks. > >>>>> > >>>>>> > >>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up > >>>>>> on a refcnt bug, but that's not worthwhile doing because drivers > >>>>>> shouldn't have silly bugs. > >>>>> > >>>>> We definitely don't want to fix that, but we want to complain loudly > >>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from > >>>>> being re-assigned to someone else by not freeing it). > >>>> > >>>> That's what the code did and continues to do here. Not exactly sure what > >>>> you're trying to say. I'm going to relocate the comment in v17 to > >>>> put_pages(), we can continue discussing it there if I'm missing yours point. > >>>> > >>> > >>> I'm just saying it would be worth mentioning that we're intentionally > >>> leaking memory if shmem->pages_use_count > 1. Something like: > >>> > >>> /** > >>> * shmem->pages_use_count should be 1 when ->sgt != NULL and > >>> * zero otherwise. If some users still hold a pages reference > >>> * that's a bug, and we intentionally leak the pages so they > >>> * can't be re-allocated to someone else while the GPU/CPU > >>> * still have access to it. > >>> */ > >>> drm_WARN_ON(drm, > >>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); > >>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) > >>> drm_gem_shmem_free_pages(shmem); > >> > >> That may be acceptable, but only once there will a driver using this > >> feature. > > > > Which feature? That's not related to a specific feature, that's just > > how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can > > only be released in drm_gem_shmem_free(), because sgt users are not > > refcounted and the sgt stays around until the GEM object is freed or > > its pages are evicted. The only valid cases we have at the moment are: > > > > - pages_use_count == 1 && sgt != NULL > > - pages_use_count == 0 > > > > any other situations are buggy. > > sgt may belong to dma-buf for which pages_use_count=0, this can't be > done until sgt mess is sorted out No it can't, not in that path, because the code you're adding is in the if (!obj->import_branch) branch: if (obj->import_attach) { drm_prime_gem_destroy(obj, shmem->sgt); } else { ... // Your changes are here. ... } >
On 9/14/23 14:58, Boris Brezillon wrote: > On Thu, 14 Sep 2023 14:36:23 +0300 > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >> On 9/14/23 11:27, Boris Brezillon wrote: >>> On Thu, 14 Sep 2023 10:50:32 +0300 >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>> >>>> On 9/14/23 10:36, Boris Brezillon wrote: >>>>> On Thu, 14 Sep 2023 07:02:52 +0300 >>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>>>> >>>>>> On 9/13/23 10:48, Boris Brezillon wrote: >>>>>>> On Wed, 13 Sep 2023 03:56:14 +0300 >>>>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: >>>>>>> >>>>>>>> On 9/5/23 11:03, Boris Brezillon wrote: >>>>>>>>>> * But >>>>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can >>>>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex >>>>>>>>>> + * and fs_reclaim. >>>>>>>>>> + * >>>>>>>>>> + * This deadlock is not actually possible, because no one should >>>>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. >>>>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the >>>>>>>>>> + * refcount drops to zero, don't touch the reservation lock. >>>>>>>>>> + */ >>>>>>>>>> + if (shmem->got_pages_sgt && >>>>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { >>>>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); >>>>>>>>>> + shmem->got_pages_sgt = false; >>>>>>>>>> } >>>>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's >>>>>>>>> better to leak than having someone access memory it no longer owns), but >>>>>>>>> I think it's worth mentioning in the above comment. >>>>>>>> >>>>>>>> It's unlikely that it will be only a leak without a following up >>>>>>>> use-after-free. Neither is acceptable. >>>>>>> >>>>>>> Not necessarily, if you have a page leak, it could be that the GPU has >>>>>>> access to those pages, but doesn't need the GEM object anymore >>>>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or >>>>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this >>>>>>> can go unnoticed and lead to memory corruptions/information leaks. >>>>>>> >>>>>>>> >>>>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up >>>>>>>> on a refcnt bug, but that's not worthwhile doing because drivers >>>>>>>> shouldn't have silly bugs. >>>>>>> >>>>>>> We definitely don't want to fix that, but we want to complain loudly >>>>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from >>>>>>> being re-assigned to someone else by not freeing it). >>>>>> >>>>>> That's what the code did and continues to do here. Not exactly sure what >>>>>> you're trying to say. I'm going to relocate the comment in v17 to >>>>>> put_pages(), we can continue discussing it there if I'm missing yours point. >>>>>> >>>>> >>>>> I'm just saying it would be worth mentioning that we're intentionally >>>>> leaking memory if shmem->pages_use_count > 1. Something like: >>>>> >>>>> /** >>>>> * shmem->pages_use_count should be 1 when ->sgt != NULL and >>>>> * zero otherwise. If some users still hold a pages reference >>>>> * that's a bug, and we intentionally leak the pages so they >>>>> * can't be re-allocated to someone else while the GPU/CPU >>>>> * still have access to it. >>>>> */ >>>>> drm_WARN_ON(drm, >>>>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); >>>>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) >>>>> drm_gem_shmem_free_pages(shmem); >>>> >>>> That may be acceptable, but only once there will a driver using this >>>> feature. >>> >>> Which feature? That's not related to a specific feature, that's just >>> how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can >>> only be released in drm_gem_shmem_free(), because sgt users are not >>> refcounted and the sgt stays around until the GEM object is freed or >>> its pages are evicted. The only valid cases we have at the moment are: >>> >>> - pages_use_count == 1 && sgt != NULL >>> - pages_use_count == 0 >>> >>> any other situations are buggy. >> >> sgt may belong to dma-buf for which pages_use_count=0, this can't be >> done until sgt mess is sorted out > > No it can't, not in that path, because the code you're adding is in the > if (!obj->import_branch) branch: > > > if (obj->import_attach) { > drm_prime_gem_destroy(obj, shmem->sgt); > } else { > ... > // Your changes are here. > ... This branch is taken for the dma-buf in the prime import error code path. But yes, the pages_use_count=0 for the dma-buf and then it can be written as: if (obj->import_attach) { drm_prime_gem_destroy(obj, shmem->sgt); } else { drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count)); if (shmem->sgt && refcount_read(&shmem->pages_use_count)) { dma_unmap_sgtable(obj->dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); sg_free_table(shmem->sgt); kfree(shmem->sgt); __drm_gem_shmem_put_pages(shmem); } drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count)); Alright, I'll check if it works as expected for fixing the error code path bug for v17
On Thu, 14 Sep 2023 16:01:37 +0300 Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > On 9/14/23 14:58, Boris Brezillon wrote: > > On Thu, 14 Sep 2023 14:36:23 +0300 > > Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > > > >> On 9/14/23 11:27, Boris Brezillon wrote: > >>> On Thu, 14 Sep 2023 10:50:32 +0300 > >>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>> > >>>> On 9/14/23 10:36, Boris Brezillon wrote: > >>>>> On Thu, 14 Sep 2023 07:02:52 +0300 > >>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>>>> > >>>>>> On 9/13/23 10:48, Boris Brezillon wrote: > >>>>>>> On Wed, 13 Sep 2023 03:56:14 +0300 > >>>>>>> Dmitry Osipenko <dmitry.osipenko@collabora.com> wrote: > >>>>>>> > >>>>>>>> On 9/5/23 11:03, Boris Brezillon wrote: > >>>>>>>>>> * But > >>>>>>>>>> + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can > >>>>>>>>>> + * cause a locking order inversion between reservation_ww_class_mutex > >>>>>>>>>> + * and fs_reclaim. > >>>>>>>>>> + * > >>>>>>>>>> + * This deadlock is not actually possible, because no one should > >>>>>>>>>> + * be already holding the lock when drm_gem_shmem_free() is called. > >>>>>>>>>> + * Unfortunately lockdep is not aware of this detail. So when the > >>>>>>>>>> + * refcount drops to zero, don't touch the reservation lock. > >>>>>>>>>> + */ > >>>>>>>>>> + if (shmem->got_pages_sgt && > >>>>>>>>>> + refcount_dec_and_test(&shmem->pages_use_count)) { > >>>>>>>>>> + drm_gem_shmem_do_release_pages_locked(shmem); > >>>>>>>>>> + shmem->got_pages_sgt = false; > >>>>>>>>>> } > >>>>>>>>> Leaking memory is the right thing to do if pages_use_count > 1 (it's > >>>>>>>>> better to leak than having someone access memory it no longer owns), but > >>>>>>>>> I think it's worth mentioning in the above comment. > >>>>>>>> > >>>>>>>> It's unlikely that it will be only a leak without a following up > >>>>>>>> use-after-free. Neither is acceptable. > >>>>>>> > >>>>>>> Not necessarily, if you have a page leak, it could be that the GPU has > >>>>>>> access to those pages, but doesn't need the GEM object anymore > >>>>>>> (pages are mapped by the iommu, which doesn't need shmem->sgt or > >>>>>>> shmem->pages after the mapping is created). Without a WARN_ON(), this > >>>>>>> can go unnoticed and lead to memory corruptions/information leaks. > >>>>>>> > >>>>>>>> > >>>>>>>> The drm_gem_shmem_free() could be changed such that kernel won't blow up > >>>>>>>> on a refcnt bug, but that's not worthwhile doing because drivers > >>>>>>>> shouldn't have silly bugs. > >>>>>>> > >>>>>>> We definitely don't want to fix that, but we want to complain loudly > >>>>>>> (WARN_ON()), and make sure the risk is limited (preventing memory from > >>>>>>> being re-assigned to someone else by not freeing it). > >>>>>> > >>>>>> That's what the code did and continues to do here. Not exactly sure what > >>>>>> you're trying to say. I'm going to relocate the comment in v17 to > >>>>>> put_pages(), we can continue discussing it there if I'm missing yours point. > >>>>>> > >>>>> > >>>>> I'm just saying it would be worth mentioning that we're intentionally > >>>>> leaking memory if shmem->pages_use_count > 1. Something like: > >>>>> > >>>>> /** > >>>>> * shmem->pages_use_count should be 1 when ->sgt != NULL and > >>>>> * zero otherwise. If some users still hold a pages reference > >>>>> * that's a bug, and we intentionally leak the pages so they > >>>>> * can't be re-allocated to someone else while the GPU/CPU > >>>>> * still have access to it. > >>>>> */ > >>>>> drm_WARN_ON(drm, > >>>>> refcount_read(&shmem->pages_use_count) == (shmem->sgt ? 1 : 0)); > >>>>> if (shmem->sgt && refcount_dec_and_test(&shmem->pages_use_count)) > >>>>> drm_gem_shmem_free_pages(shmem); > >>>> > >>>> That may be acceptable, but only once there will a driver using this > >>>> feature. > >>> > >>> Which feature? That's not related to a specific feature, that's just > >>> how drm_gem_shmem_get_pages_sgt() works, it takes a pages ref that can > >>> only be released in drm_gem_shmem_free(), because sgt users are not > >>> refcounted and the sgt stays around until the GEM object is freed or > >>> its pages are evicted. The only valid cases we have at the moment are: > >>> > >>> - pages_use_count == 1 && sgt != NULL > >>> - pages_use_count == 0 > >>> > >>> any other situations are buggy. > >> > >> sgt may belong to dma-buf for which pages_use_count=0, this can't be > >> done until sgt mess is sorted out > > > > No it can't, not in that path, because the code you're adding is in the > > if (!obj->import_branch) branch: > > > > > > if (obj->import_attach) { > > drm_prime_gem_destroy(obj, shmem->sgt); > > } else { > > ... > > // Your changes are here. > > ... > > This branch is taken for the dma-buf in the prime import error code path. I suggested a fix for this error that didn't involve adding a new flag, but that's orthogonal to the piece of code we're discussing anyway. > But yes, the pages_use_count=0 for the dma-buf and then it can be > written as: > > if (obj->import_attach) { > drm_prime_gem_destroy(obj, shmem->sgt); > } else { > drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count)); > > if (shmem->sgt && refcount_read(&shmem->pages_use_count)) { You should drop the '&& refcount_read(&shmem->pages_use_count)', otherwise you'll never enter this branch (sgt allocation retained a ref, so pages_use_count > 0 when ->sgt != NULL). If you added this pages_use_count > 0 check to deal with the 'free-partially-imported-GEM' case, I keep thinking this is not the right fix. You should just assume that obj->import_attach == NULL means not-a-prime-buffer, and then make sure partially-initialized-prime-GEMs have import_attach assigned (see the oneliner I suggested in my review of `[PATCH v15 01/23] drm/shmem-helper: Fix UAF in error path when freeing SGT of imported GEM`). > dma_unmap_sgtable(obj->dev->dev, shmem->sgt, > DMA_BIDIRECTIONAL, 0); > sg_free_table(shmem->sgt); > kfree(shmem->sgt); > > __drm_gem_shmem_put_pages(shmem); You need to decrement pages_use_count: /* shmem->pages_use_count should be 1 when ->sgt != NULL and * zero otherwise. If some users still hold a pages reference * that's a bug, and we intentionally leak the pages so they * can't be re-allocated to someone else while the GPU/CPU * still have access to it. */ if (refcount_dec_and_test(&shmem->pages_use_count)) __drm_gem_shmem_put_pages(shmem); > } > > drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count)); And now this WARN_ON() ^ should catch unexpected pages leak. > > Alright, I'll check if it works as expected for fixing the error code path bug for v17 >
On Thu, 14 Sep 2023 15:27:03 +0200 Boris Brezillon <boris.brezillon@collabora.com> wrote: > You should drop the '&& refcount_read(&shmem->pages_use_count)', > otherwise you'll never enter this branch (sgt allocation retained > a ref, so pages_use_count > 0 when ->sgt != NULL). Sorry for the brain fart. You can drop this extra test because its redundant (->sgt != NULL implies pages_use_count > 0), but it shouldn't prevent you from entering the branch.
On 9/14/23 16:27, Boris Brezillon wrote: ... > If you added this pages_use_count > 0 check to deal with the > 'free-partially-imported-GEM' case, I keep thinking this is not > the right fix. You should just assume that obj->import_attach == NULL > means not-a-prime-buffer, and then make sure > partially-initialized-prime-GEMs have import_attach assigned (see the > oneliner I suggested in my review of > `[PATCH v15 01/23] drm/shmem-helper: Fix UAF in error path when > freeing SGT of imported GEM`). Yes, I added it to deal with the partially imported GEM. The obj->import_attach can't be set until dma-buf is fully imported as it also will cause trouble for the error code path, now dma-buf will be freed two times. >> dma_unmap_sgtable(obj->dev->dev, shmem->sgt, >> DMA_BIDIRECTIONAL, 0); >> sg_free_table(shmem->sgt); >> kfree(shmem->sgt); >> >> __drm_gem_shmem_put_pages(shmem); > You need to decrement pages_use_count: > > /* shmem->pages_use_count should be 1 when ->sgt != NULL and > * zero otherwise. If some users still hold a pages reference > * that's a bug, and we intentionally leak the pages so they > * can't be re-allocated to someone else while the GPU/CPU > * still have access to it. > */ > if (refcount_dec_and_test(&shmem->pages_use_count)) > __drm_gem_shmem_put_pages(shmem); > The put_pages() itself decrements the refcnt. I'm going back to deferring all this questionable changes for the later times. It is not essential problem for this patchset.
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 4633a418faba..a0a6c7e505c8 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -20,6 +20,7 @@ #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_managed.h> #include <drm/drm_prime.h> #include <drm/drm_print.h> @@ -88,8 +89,6 @@ __drm_gem_shmem_create(struct drm_device *dev, size_t size, bool private) if (ret) goto err_release; - INIT_LIST_HEAD(&shmem->madv_list); - if (!private) { /* * Our buffers are kept pinned, so allocating them @@ -128,6 +127,62 @@ struct drm_gem_shmem_object *drm_gem_shmem_create(struct drm_device *dev, size_t } EXPORT_SYMBOL_GPL(drm_gem_shmem_create); +static bool drm_gem_shmem_is_evictable(struct drm_gem_shmem_object *shmem) +{ + dma_resv_assert_held(shmem->base.resv); + + return (shmem->madv >= 0) && shmem->base.funcs->evict && + refcount_read(&shmem->pages_use_count) && + !refcount_read(&shmem->pages_pin_count) && + !shmem->base.dma_buf && !shmem->base.import_attach && + shmem->sgt && !shmem->evicted; +} + +static void +drm_gem_shmem_update_pages_state_locked(struct drm_gem_shmem_object *shmem) +{ + struct drm_gem_object *obj = &shmem->base; + struct drm_gem_shmem *shmem_mm = obj->dev->shmem_mm; + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; + + dma_resv_assert_held(shmem->base.resv); + + if (!shmem_shrinker || obj->import_attach) + return; + + if (shmem->madv < 0) + drm_gem_lru_remove(&shmem->base); + else if (drm_gem_shmem_is_evictable(shmem) || drm_gem_shmem_is_purgeable(shmem)) + drm_gem_lru_move_tail(&shmem_shrinker->lru_evictable, &shmem->base); + else if (shmem->evicted) + drm_gem_lru_move_tail(&shmem_shrinker->lru_evicted, &shmem->base); + else if (!shmem->pages) + drm_gem_lru_remove(&shmem->base); + else + drm_gem_lru_move_tail(&shmem_shrinker->lru_pinned, &shmem->base); +} + +static void +drm_gem_shmem_do_release_pages_locked(struct drm_gem_shmem_object *shmem) +{ + struct drm_gem_object *obj = &shmem->base; + + if (!shmem->pages) { + drm_WARN_ON(obj->dev, !shmem->evicted && shmem->madv >= 0); + return; + } + +#ifdef CONFIG_X86 + if (shmem->map_wc) + set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); +#endif + + drm_gem_put_pages(obj, shmem->pages, + shmem->pages_mark_dirty_on_put, + shmem->pages_mark_accessed_on_put); + shmem->pages = NULL; +} + /** * drm_gem_shmem_free - Free resources associated with a shmem GEM object * @shmem: shmem GEM object to free @@ -142,8 +197,6 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) if (obj->import_attach) { drm_prime_gem_destroy(obj, shmem->sgt); } else if (!shmem->imported_sgt) { - dma_resv_lock(shmem->base.resv, NULL); - drm_WARN_ON(obj->dev, refcount_read(&shmem->vmap_use_count)); if (shmem->sgt) { @@ -152,14 +205,27 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) sg_free_table(shmem->sgt); kfree(shmem->sgt); } - if (refcount_read(&shmem->pages_use_count)) { - drm_gem_shmem_put_pages_locked(shmem); - drm_WARN_ON(obj->dev, !shmem->got_pages_sgt); + + /* + * Destroying the object is a special case.. drm_gem_shmem_free() + * calls many things that WARN_ON if the obj lock is not held. But + * acquiring the obj lock in drm_gem_shmem_release_pages_locked() can + * cause a locking order inversion between reservation_ww_class_mutex + * and fs_reclaim. + * + * This deadlock is not actually possible, because no one should + * be already holding the lock when drm_gem_shmem_free() is called. + * Unfortunately lockdep is not aware of this detail. So when the + * refcount drops to zero, don't touch the reservation lock. + */ + if (shmem->got_pages_sgt && + refcount_dec_and_test(&shmem->pages_use_count)) { + drm_gem_shmem_do_release_pages_locked(shmem); + shmem->got_pages_sgt = false; } drm_WARN_ON(obj->dev, refcount_read(&shmem->pages_use_count)); - - dma_resv_unlock(shmem->base.resv); + drm_WARN_ON(obj->dev, shmem->got_pages_sgt); } drm_gem_object_release(obj); @@ -167,15 +233,26 @@ void drm_gem_shmem_free(struct drm_gem_shmem_object *shmem) } EXPORT_SYMBOL_GPL(drm_gem_shmem_free); -static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) +static int +drm_gem_shmem_acquire_pages(struct drm_gem_shmem_object *shmem, bool init) { struct drm_gem_object *obj = &shmem->base; struct page **pages; dma_resv_assert_held(shmem->base.resv); - if (refcount_inc_not_zero(&shmem->pages_use_count)) + if (shmem->madv < 0) { + drm_WARN_ON(obj->dev, shmem->pages); + return -ENOMEM; + } + + if (shmem->pages) { + drm_WARN_ON(obj->dev, !shmem->evicted); return 0; + } + + if (drm_WARN_ON(obj->dev, !(init ^ refcount_read(&shmem->pages_use_count)))) + return -EINVAL; pages = drm_gem_get_pages(obj); if (IS_ERR(pages)) { @@ -196,8 +273,36 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) shmem->pages = pages; + return 0; +} + +static void +drm_gem_shmem_release_pages_locked(struct drm_gem_shmem_object *shmem) +{ + dma_resv_assert_held(shmem->base.resv); + drm_gem_shmem_do_release_pages_locked(shmem); +} + +static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) +{ + int err; + + dma_resv_assert_held(shmem->base.resv); + + if (shmem->madv < 0) + return -ENOMEM; + + if (refcount_inc_not_zero(&shmem->pages_use_count)) + return 0; + + err = drm_gem_shmem_acquire_pages(shmem, true); + if (err) + return err; + refcount_set(&shmem->pages_use_count, 1); + drm_gem_shmem_update_pages_state_locked(shmem); + return 0; } @@ -209,20 +314,11 @@ static int drm_gem_shmem_get_pages_locked(struct drm_gem_shmem_object *shmem) */ void drm_gem_shmem_put_pages_locked(struct drm_gem_shmem_object *shmem) { - struct drm_gem_object *obj = &shmem->base; - dma_resv_assert_held(shmem->base.resv); if (refcount_dec_and_test(&shmem->pages_use_count)) { -#ifdef CONFIG_X86 - if (shmem->map_wc) - set_pages_array_wb(shmem->pages, obj->size >> PAGE_SHIFT); -#endif - - drm_gem_put_pages(obj, shmem->pages, - shmem->pages_mark_dirty_on_put, - shmem->pages_mark_accessed_on_put); - shmem->pages = NULL; + drm_gem_shmem_release_pages_locked(shmem); + drm_gem_shmem_update_pages_state_locked(shmem); } } EXPORT_SYMBOL_GPL(drm_gem_shmem_put_pages_locked); @@ -251,8 +347,15 @@ static int drm_gem_shmem_pin_locked(struct drm_gem_shmem_object *shmem) return 0; ret = drm_gem_shmem_get_pages_locked(shmem); - if (!ret) + if (!ret) { + ret = drm_gem_shmem_swapin_locked(shmem); + if (ret) { + drm_gem_shmem_put_pages_locked(shmem); + return ret; + } + refcount_set(&shmem->pages_pin_count, 1); + } return ret; } @@ -448,29 +551,54 @@ int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv) madv = shmem->madv; + drm_gem_shmem_update_pages_state_locked(shmem); + return (madv >= 0); } EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise_locked); -void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) +int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv) +{ + struct drm_gem_object *obj = &shmem->base; + int ret; + + ret = dma_resv_lock_interruptible(obj->resv, NULL); + if (ret) + return ret; + + ret = drm_gem_shmem_madvise_locked(shmem, madv); + dma_resv_unlock(obj->resv); + + return ret; +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_madvise); + +static void drm_gem_shmem_unpin_pages_locked(struct drm_gem_shmem_object *shmem) { struct drm_gem_object *obj = &shmem->base; struct drm_device *dev = obj->dev; dma_resv_assert_held(shmem->base.resv); - drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); + if (shmem->evicted) + return; dma_unmap_sgtable(dev->dev, shmem->sgt, DMA_BIDIRECTIONAL, 0); + drm_gem_shmem_release_pages_locked(shmem); + drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); + sg_free_table(shmem->sgt); kfree(shmem->sgt); shmem->sgt = NULL; +} - drm_gem_shmem_put_pages_locked(shmem); +void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) +{ + struct drm_gem_object *obj = &shmem->base; - shmem->madv = -1; + drm_WARN_ON(obj->dev, !drm_gem_shmem_is_purgeable(shmem)); - drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); + drm_gem_shmem_unpin_pages_locked(shmem); drm_gem_free_mmap_offset(obj); /* Our goal here is to return as much of the memory as @@ -481,9 +609,59 @@ void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem) shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1); invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); + + shmem->madv = -1; + shmem->evicted = false; + drm_gem_shmem_update_pages_state_locked(shmem); } EXPORT_SYMBOL_GPL(drm_gem_shmem_purge_locked); +/** + * drm_gem_shmem_swapin_locked() - Moves shmem GEM back to memory and enables + * hardware access to the memory. + * @shmem: shmem GEM object + * + * This function moves shmem GEM back to memory if it was previously evicted + * by the memory shrinker. The GEM is ready to use on success. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem) +{ + struct drm_gem_object *obj = &shmem->base; + struct sg_table *sgt; + int err; + + dma_resv_assert_held(shmem->base.resv); + + if (shmem->evicted) { + err = drm_gem_shmem_acquire_pages(shmem, false); + if (err) + return err; + + sgt = drm_gem_shmem_get_sg_table(shmem); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + err = dma_map_sgtable(obj->dev->dev, sgt, + DMA_BIDIRECTIONAL, 0); + if (err) { + sg_free_table(sgt); + kfree(sgt); + return err; + } + + shmem->sgt = sgt; + shmem->evicted = false; + + drm_gem_shmem_update_pages_state_locked(shmem); + } + + return 0; +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_swapin_locked); + /** * drm_gem_shmem_dumb_create - Create a dumb shmem buffer object * @file: DRM file structure to create the dumb buffer for @@ -530,22 +708,34 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) vm_fault_t ret; struct page *page; pgoff_t page_offset; + bool pages_unpinned; + int err; /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; dma_resv_lock(shmem->base.resv, NULL); - if (page_offset >= num_pages || - drm_WARN_ON_ONCE(obj->dev, !shmem->pages) || - shmem->madv < 0) { + /* Sanity-check that we have the pages pointer when it should present */ + pages_unpinned = (shmem->evicted || shmem->madv < 0 || + !refcount_read(&shmem->pages_use_count)); + drm_WARN_ON_ONCE(obj->dev, !shmem->pages ^ pages_unpinned); + + if (page_offset >= num_pages || (!shmem->pages && !shmem->evicted)) { ret = VM_FAULT_SIGBUS; } else { + err = drm_gem_shmem_swapin_locked(shmem); + if (err) { + ret = VM_FAULT_OOM; + goto unlock; + } + page = shmem->pages[page_offset]; ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page)); } +unlock: dma_resv_unlock(shmem->base.resv); return ret; @@ -568,6 +758,7 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) drm_WARN_ON_ONCE(obj->dev, !refcount_inc_not_zero(&shmem->pages_use_count)); + drm_gem_shmem_update_pages_state_locked(shmem); dma_resv_unlock(shmem->base.resv); drm_gem_vm_open(vma); @@ -653,7 +844,9 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, drm_printf_indent(p, indent, "pages_pin_count=%u\n", refcount_read(&shmem->pages_pin_count)); drm_printf_indent(p, indent, "pages_use_count=%u\n", refcount_read(&shmem->pages_use_count)); drm_printf_indent(p, indent, "vmap_use_count=%u\n", refcount_read(&shmem->vmap_use_count)); + drm_printf_indent(p, indent, "evicted=%d\n", shmem->evicted); drm_printf_indent(p, indent, "vaddr=%p\n", shmem->vaddr); + drm_printf_indent(p, indent, "madv=%d\n", shmem->madv); } EXPORT_SYMBOL_GPL(drm_gem_shmem_print_info); @@ -715,6 +908,8 @@ static struct sg_table *drm_gem_shmem_get_pages_sgt_locked(struct drm_gem_shmem_ shmem->sgt = sgt; + drm_gem_shmem_update_pages_state_locked(shmem); + return sgt; err_free_sgt: @@ -802,6 +997,191 @@ drm_gem_shmem_prime_import_sg_table(struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_prime_import_sg_table); +static struct drm_gem_shmem_shrinker * +to_drm_gem_shmem_shrinker(struct shrinker *shrinker) +{ + return container_of(shrinker, struct drm_gem_shmem_shrinker, base); +} + +static unsigned long +drm_gem_shmem_shrinker_count_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct drm_gem_shmem_shrinker *shmem_shrinker = + to_drm_gem_shmem_shrinker(shrinker); + unsigned long count = shmem_shrinker->lru_evictable.count; + + if (count >= SHRINK_EMPTY) + return SHRINK_EMPTY - 1; + + return count ?: SHRINK_EMPTY; +} + +void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem) +{ + struct drm_gem_object *obj = &shmem->base; + + drm_WARN_ON(obj->dev, !drm_gem_shmem_is_evictable(shmem)); + drm_WARN_ON(obj->dev, shmem->evicted); + + drm_gem_shmem_unpin_pages_locked(shmem); + + shmem->evicted = true; + drm_gem_shmem_update_pages_state_locked(shmem); +} +EXPORT_SYMBOL_GPL(drm_gem_shmem_evict_locked); + +static bool drm_gem_shmem_shrinker_evict_locked(struct drm_gem_object *obj) +{ + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + int err; + + if (!drm_gem_shmem_is_evictable(shmem) || + get_nr_swap_pages() < obj->size >> PAGE_SHIFT) + return false; + + err = drm_gem_evict_locked(obj); + if (err) + return false; + + return true; +} + +static bool drm_gem_shmem_shrinker_purge_locked(struct drm_gem_object *obj) +{ + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + int err; + + if (!drm_gem_shmem_is_purgeable(shmem)) + return false; + + err = drm_gem_evict_locked(obj); + if (err) + return false; + + return true; +} + +static unsigned long +drm_gem_shmem_shrinker_scan_objects(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct drm_gem_shmem_shrinker *shmem_shrinker; + unsigned long nr_to_scan = sc->nr_to_scan; + unsigned long remaining = 0; + unsigned long freed = 0; + + shmem_shrinker = to_drm_gem_shmem_shrinker(shrinker); + + /* purge as many objects as we can */ + freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable, + nr_to_scan, &remaining, + drm_gem_shmem_shrinker_purge_locked); + + /* evict as many objects as we can */ + if (freed < nr_to_scan) + freed += drm_gem_lru_scan(&shmem_shrinker->lru_evictable, + nr_to_scan - freed, &remaining, + drm_gem_shmem_shrinker_evict_locked); + + return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP; +} + +static int drm_gem_shmem_shrinker_init(struct drm_gem_shmem *shmem_mm, + const char *shrinker_name) +{ + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; + int err; + + shmem_shrinker->base.count_objects = drm_gem_shmem_shrinker_count_objects; + shmem_shrinker->base.scan_objects = drm_gem_shmem_shrinker_scan_objects; + shmem_shrinker->base.seeks = DEFAULT_SEEKS; + + mutex_init(&shmem_shrinker->lock); + drm_gem_lru_init(&shmem_shrinker->lru_evictable, &shmem_shrinker->lock); + drm_gem_lru_init(&shmem_shrinker->lru_evicted, &shmem_shrinker->lock); + drm_gem_lru_init(&shmem_shrinker->lru_pinned, &shmem_shrinker->lock); + + err = register_shrinker(&shmem_shrinker->base, shrinker_name); + if (err) { + mutex_destroy(&shmem_shrinker->lock); + return err; + } + + return 0; +} + +static void drm_gem_shmem_shrinker_release(struct drm_device *dev, + struct drm_gem_shmem *shmem_mm) +{ + struct drm_gem_shmem_shrinker *shmem_shrinker = &shmem_mm->shrinker; + + unregister_shrinker(&shmem_shrinker->base); + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evictable.list)); + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_evicted.list)); + drm_WARN_ON(dev, !list_empty(&shmem_shrinker->lru_pinned.list)); + mutex_destroy(&shmem_shrinker->lock); +} + +static int drm_gem_shmem_init(struct drm_device *dev) +{ + int err; + + if (drm_WARN_ON(dev, dev->shmem_mm)) + return -EBUSY; + + dev->shmem_mm = kzalloc(sizeof(*dev->shmem_mm), GFP_KERNEL); + if (!dev->shmem_mm) + return -ENOMEM; + + err = drm_gem_shmem_shrinker_init(dev->shmem_mm, dev->unique); + if (err) + goto free_gem_shmem; + + return 0; + +free_gem_shmem: + kfree(dev->shmem_mm); + dev->shmem_mm = NULL; + + return err; +} + +static void drm_gem_shmem_release(struct drm_device *dev, void *ptr) +{ + struct drm_gem_shmem *shmem_mm = dev->shmem_mm; + + drm_gem_shmem_shrinker_release(dev, shmem_mm); + dev->shmem_mm = NULL; + kfree(shmem_mm); +} + +/** + * drmm_gem_shmem_init() - Initialize drm-shmem internals + * @dev: DRM device + * + * Cleanup is automatically managed as part of DRM device releasing. + * Calling this function multiple times will result in a error. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +int drmm_gem_shmem_init(struct drm_device *dev) +{ + int err; + + err = drm_gem_shmem_init(dev); + if (err) + return err; + + err = drmm_add_action_or_reset(dev, drm_gem_shmem_release, NULL); + if (err) + return err; + + return 0; +} +EXPORT_SYMBOL_GPL(drmm_gem_shmem_init); + MODULE_DESCRIPTION("DRM SHMEM memory-management helpers"); MODULE_IMPORT_NS(DMA_BUF); MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c index 72193bd734e1..1aa94fff7072 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c +++ b/drivers/gpu/drm/panfrost/panfrost_gem_shrinker.c @@ -15,6 +15,13 @@ #include "panfrost_gem.h" #include "panfrost_mmu.h" +static bool panfrost_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) +{ + return (shmem->madv > 0) && + !refcount_read(&shmem->pages_pin_count) && shmem->sgt && + !shmem->base.dma_buf && !shmem->base.import_attach; +} + static unsigned long panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { @@ -27,7 +34,7 @@ panfrost_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc return 0; list_for_each_entry(shmem, &pfdev->shrinker_list, madv_list) { - if (drm_gem_shmem_is_purgeable(shmem)) + if (panfrost_gem_shmem_is_purgeable(shmem)) count += shmem->base.size >> PAGE_SHIFT; } diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 7cf4afae2e79..a978f0cb5e84 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -16,6 +16,7 @@ struct drm_vblank_crtc; struct drm_vma_offset_manager; struct drm_vram_mm; struct drm_fb_helper; +struct drm_gem_shmem_shrinker; struct inode; @@ -290,8 +291,13 @@ struct drm_device { /** @vma_offset_manager: GEM information */ struct drm_vma_offset_manager *vma_offset_manager; - /** @vram_mm: VRAM MM memory manager */ - struct drm_vram_mm *vram_mm; + union { + /** @vram_mm: VRAM MM memory manager */ + struct drm_vram_mm *vram_mm; + + /** @shmem_mm: SHMEM GEM memory manager */ + struct drm_gem_shmem *shmem_mm; + }; /** * @switch_power_state: diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index 63e91e8f2d5c..65c99da25048 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -6,6 +6,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/mutex.h> +#include <linux/shrinker.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> @@ -13,6 +14,7 @@ #include <drm/drm_prime.h> struct dma_buf_attachment; +struct drm_device; struct drm_mode_create_dumb; struct drm_printer; struct sg_table; @@ -53,8 +55,8 @@ struct drm_gem_shmem_object { * @madv: State for madvise * * 0 is active/inuse. + * 1 is not-needed/can-be-purged * A negative value is the object is purged. - * Positive values are driver specific and not used by the helpers. */ int madv; @@ -115,6 +117,12 @@ struct drm_gem_shmem_object { * @map_wc: map object write-combined (instead of using shmem defaults). */ bool map_wc : 1; + + /** + * @evicted: True if shmem pages are evicted by the memory shrinker. + * Used internally by memory shrinker. + */ + bool evicted : 1; }; #define to_drm_gem_shmem_obj(obj) \ @@ -133,14 +141,22 @@ void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct *vma); int drm_gem_shmem_madvise_locked(struct drm_gem_shmem_object *shmem, int madv); +int drm_gem_shmem_madvise(struct drm_gem_shmem_object *shmem, int madv); static inline bool drm_gem_shmem_is_purgeable(struct drm_gem_shmem_object *shmem) { - return (shmem->madv > 0) && - !refcount_read(&shmem->pages_pin_count) && shmem->sgt && - !shmem->base.dma_buf && !shmem->base.import_attach; + dma_resv_assert_held(shmem->base.resv); + + return (shmem->madv > 0) && shmem->base.funcs->evict && + refcount_read(&shmem->pages_use_count) && + !refcount_read(&shmem->pages_pin_count) && + !shmem->base.dma_buf && !shmem->base.import_attach && + (shmem->sgt || shmem->evicted); } +int drm_gem_shmem_swapin_locked(struct drm_gem_shmem_object *shmem); + +void drm_gem_shmem_evict_locked(struct drm_gem_shmem_object *shmem); void drm_gem_shmem_purge_locked(struct drm_gem_shmem_object *shmem); struct sg_table *drm_gem_shmem_get_sg_table(struct drm_gem_shmem_object *shmem); @@ -284,6 +300,53 @@ static inline int drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct v return drm_gem_shmem_mmap(shmem, vma); } +/** + * drm_gem_shmem_object_madvise - unlocked GEM object function for drm_gem_shmem_madvise_locked() + * @obj: GEM object + * @madv: Madvise value + * + * This function wraps drm_gem_shmem_madvise_locked(), providing unlocked variant. + * + * Returns: + * 0 on success or a negative error code on failure. + */ +static inline int drm_gem_shmem_object_madvise(struct drm_gem_object *obj, int madv) +{ + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + return drm_gem_shmem_madvise(shmem, madv); +} + +/** + * struct drm_gem_shmem_shrinker - Memory shrinker of GEM shmem memory manager + */ +struct drm_gem_shmem_shrinker { + /** @base: Shrinker for purging shmem GEM objects */ + struct shrinker base; + + /** @lock: Protects @lru_* */ + struct mutex lock; + + /** @lru_pinned: List of pinned shmem GEM objects */ + struct drm_gem_lru lru_pinned; + + /** @lru_evictable: List of shmem GEM objects to be evicted */ + struct drm_gem_lru lru_evictable; + + /** @lru_evicted: List of evicted shmem GEM objects */ + struct drm_gem_lru lru_evicted; +}; + +/** + * struct drm_gem_shmem - GEM shmem memory manager + */ +struct drm_gem_shmem { + /** @shrinker: GEM shmem shrinker */ + struct drm_gem_shmem_shrinker shrinker; +}; + +int drmm_gem_shmem_init(struct drm_device *dev); + /* * Driver ops */