Message ID | 20221118182407.82548-5-nphamcs@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Implement writeback for zsmalloc | expand |
On Fri, Nov 18, 2022 at 10:24:05AM -0800, Nhat Pham wrote: > This helps determines the coldest zspages as candidates for writeback. > > Signed-off-by: Nhat Pham <nphamcs@gmail.com> > --- > mm/zsmalloc.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 43 insertions(+), 2 deletions(-) > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > index 326faa751f0a..9e7b54324181 100644 > --- a/mm/zsmalloc.c > +++ b/mm/zsmalloc.c > @@ -239,6 +239,11 @@ struct zs_pool { > /* Compact classes */ > struct shrinker shrinker; > > +#ifdef CONFIG_ZPOOL > + /* List tracking the zspages in LRU order by most recently added object */ > + struct list_head lru; > +#endif > + > #ifdef CONFIG_ZSMALLOC_STAT > struct dentry *stat_dentry; > #endif > @@ -260,6 +265,12 @@ struct zspage { > unsigned int freeobj; > struct page *first_page; > struct list_head list; /* fullness list */ > + > +#ifdef CONFIG_ZPOOL > + /* links the zspage to the lru list in the pool */ > + struct list_head lru; > +#endif > + > struct zs_pool *pool; > #ifdef CONFIG_COMPACTION > rwlock_t lock; > @@ -352,6 +363,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) > kmem_cache_free(pool->zspage_cachep, zspage); > } > > +#ifdef CONFIG_ZPOOL > +/* Moves the zspage to the front of the zspool's LRU */ > +static void move_to_front(struct zs_pool *pool, struct zspage *zspage) > +{ > + assert_spin_locked(&pool->lock); > + > + if (!list_empty(&zspage->lru)) > + list_del(&zspage->lru); > + list_add(&zspage->lru, &pool->lru); > +} > +#endif > + > /* pool->lock(which owns the handle) synchronizes races */ > static void record_obj(unsigned long handle, unsigned long obj) > { > @@ -953,6 +976,9 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, > } > > remove_zspage(class, zspage, ZS_EMPTY); > +#ifdef CONFIG_ZPOOL > + list_del(&zspage->lru); > +#endif > __free_zspage(pool, class, zspage); > } > > @@ -998,6 +1024,10 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) > off %= PAGE_SIZE; > } > > +#ifdef CONFIG_ZPOOL > + INIT_LIST_HEAD(&zspage->lru); > +#endif > + > set_freeobj(zspage, 0); > } > > @@ -1418,9 +1448,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) > fix_fullness_group(class, zspage); > record_obj(handle, obj); > class_stat_inc(class, OBJ_USED, 1); > - spin_unlock(&pool->lock); > > - return handle; > + goto out; > } > > spin_unlock(&pool->lock); > @@ -1444,6 +1473,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) > > /* We completely set up zspage so mark them as movable */ > SetZsPageMovable(pool, zspage); > +out: > +#ifdef CONFIG_ZPOOL > + /* Move the zspage to front of pool's LRU */ > + move_to_front(pool, zspage); > +#endif > spin_unlock(&pool->lock); Please move the move_to_front into zs_map_object with ZS_MM_WO with comment with "why we are doing only for WO case".
On Fri, Nov 18, 2022 at 11:32:01AM -0800, Minchan Kim wrote: > On Fri, Nov 18, 2022 at 10:24:05AM -0800, Nhat Pham wrote: > > @@ -1444,6 +1473,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) > > > > /* We completely set up zspage so mark them as movable */ > > SetZsPageMovable(pool, zspage); > > +out: > > +#ifdef CONFIG_ZPOOL > > + /* Move the zspage to front of pool's LRU */ > > + move_to_front(pool, zspage); > > +#endif > > spin_unlock(&pool->lock); > > Please move the move_to_front into zs_map_object with ZS_MM_WO with > comment with "why we are doing only for WO case". I replied to the other thread, but I disagree with this request. The WO exception would be as zswap-specific as is the rotate-on-alloc. It doesn't make the resulting zsmalloc code any cleaner or more generic, just weird in a slightly different way. On the other hand, it makes zsmalloc deviate from the other backends and introduces new callchains that invalidate thousands of machine hours of production testing of this code.
On Fri, Nov 18, 2022 at 03:05:04PM -0500, Johannes Weiner wrote: > On Fri, Nov 18, 2022 at 11:32:01AM -0800, Minchan Kim wrote: > > On Fri, Nov 18, 2022 at 10:24:05AM -0800, Nhat Pham wrote: > > > @@ -1444,6 +1473,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) > > > > > > /* We completely set up zspage so mark them as movable */ > > > SetZsPageMovable(pool, zspage); > > > +out: > > > +#ifdef CONFIG_ZPOOL > > > + /* Move the zspage to front of pool's LRU */ > > > + move_to_front(pool, zspage); > > > +#endif > > > spin_unlock(&pool->lock); > > > > Please move the move_to_front into zs_map_object with ZS_MM_WO with > > comment with "why we are doing only for WO case". > > I replied to the other thread, but I disagree with this request. > > The WO exception would be as zswap-specific as is the > rotate-on-alloc. It doesn't make the resulting zsmalloc code any That's true but at least, zs_pool allocators have the accessor so that's fair place to have the LRU updating. I guess that's why you agreed that's better place. No? I understand that's zswap-specific that the bad design keeps pushing smelly code into allocators and then "push to take it since other were already doing" with "we will take them off with better solution in future". I am really struggling to understand this concept. Johannes, Is that really how we work over a decade? > cleaner or more generic, just weird in a slightly different way. > > On the other hand, it makes zsmalloc deviate from the other backends > and introduces new callchains that invalidate thousands of machine > hours of production testing of this code. Do you really believe the trival change makes invalidates the testing? ret = zpool_malloc(entry->pool->zpool, hlen + dlen, gfp, &handle); if (ret == -ENOSPC) { zswap_reject_compress_poor++; goto put_dstmem; } if (ret) { zswap_reject_alloc_fail++; goto put_dstmem; } buf = zpool_map_handle(entry->pool->zpool, handle, ZPOOL_MM_WO); memcpy(buf, &zhdr, hlen); memcpy(buf + hlen, dst, dlen); zpool_unmap_handle(entry->pool->zpool, handle);
On Fri, Nov 18, 2022 at 01:35:01PM -0800, Minchan Kim wrote: > On Fri, Nov 18, 2022 at 03:05:04PM -0500, Johannes Weiner wrote: > > On Fri, Nov 18, 2022 at 11:32:01AM -0800, Minchan Kim wrote: > > > On Fri, Nov 18, 2022 at 10:24:05AM -0800, Nhat Pham wrote: > > > > @@ -1444,6 +1473,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) > > > > > > > > /* We completely set up zspage so mark them as movable */ > > > > SetZsPageMovable(pool, zspage); > > > > +out: > > > > +#ifdef CONFIG_ZPOOL > > > > + /* Move the zspage to front of pool's LRU */ > > > > + move_to_front(pool, zspage); > > > > +#endif > > > > spin_unlock(&pool->lock); > > > > > > Please move the move_to_front into zs_map_object with ZS_MM_WO with > > > comment with "why we are doing only for WO case". > > > > I replied to the other thread, but I disagree with this request. > > > > The WO exception would be as zswap-specific as is the > > rotate-on-alloc. It doesn't make the resulting zsmalloc code any > > That's true but at least, zs_pool allocators have the accessor so > that's fair place to have the LRU updating. I guess that's why > you agreed that's better place. No? > > I understand that's zswap-specific that the bad design keeps > pushing smelly code into allocators and then "push to take it > since other were already doing" with "we will take them off with > better solution in future". I am really struggling to understand > this concept. Johannes, Is that really how we work over a decade? My point was that there is no difference between having zswap code in alloc or in map. And there is a small upside to having it in alloc because of the other backends. But I won't fight you on it. The code isn't going to stay like this for long anyway.
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 326faa751f0a..9e7b54324181 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -239,6 +239,11 @@ struct zs_pool { /* Compact classes */ struct shrinker shrinker; +#ifdef CONFIG_ZPOOL + /* List tracking the zspages in LRU order by most recently added object */ + struct list_head lru; +#endif + #ifdef CONFIG_ZSMALLOC_STAT struct dentry *stat_dentry; #endif @@ -260,6 +265,12 @@ struct zspage { unsigned int freeobj; struct page *first_page; struct list_head list; /* fullness list */ + +#ifdef CONFIG_ZPOOL + /* links the zspage to the lru list in the pool */ + struct list_head lru; +#endif + struct zs_pool *pool; #ifdef CONFIG_COMPACTION rwlock_t lock; @@ -352,6 +363,18 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } +#ifdef CONFIG_ZPOOL +/* Moves the zspage to the front of the zspool's LRU */ +static void move_to_front(struct zs_pool *pool, struct zspage *zspage) +{ + assert_spin_locked(&pool->lock); + + if (!list_empty(&zspage->lru)) + list_del(&zspage->lru); + list_add(&zspage->lru, &pool->lru); +} +#endif + /* pool->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { @@ -953,6 +976,9 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, } remove_zspage(class, zspage, ZS_EMPTY); +#ifdef CONFIG_ZPOOL + list_del(&zspage->lru); +#endif __free_zspage(pool, class, zspage); } @@ -998,6 +1024,10 @@ static void init_zspage(struct size_class *class, struct zspage *zspage) off %= PAGE_SIZE; } +#ifdef CONFIG_ZPOOL + INIT_LIST_HEAD(&zspage->lru); +#endif + set_freeobj(zspage, 0); } @@ -1418,9 +1448,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) fix_fullness_group(class, zspage); record_obj(handle, obj); class_stat_inc(class, OBJ_USED, 1); - spin_unlock(&pool->lock); - return handle; + goto out; } spin_unlock(&pool->lock); @@ -1444,6 +1473,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); +out: +#ifdef CONFIG_ZPOOL + /* Move the zspage to front of pool's LRU */ + move_to_front(pool, zspage); +#endif spin_unlock(&pool->lock); return handle; @@ -1967,6 +2001,9 @@ static void async_free_zspage(struct work_struct *work) VM_BUG_ON(fullness != ZS_EMPTY); class = pool->size_class[class_idx]; spin_lock(&pool->lock); +#ifdef CONFIG_ZPOOL + list_del(&zspage->lru); +#endif __free_zspage(pool, class, zspage); spin_unlock(&pool->lock); } @@ -2278,6 +2315,10 @@ struct zs_pool *zs_create_pool(const char *name) */ zs_register_shrinker(pool); +#ifdef CONFIG_ZPOOL + INIT_LIST_HEAD(&pool->lru); +#endif + return pool; err:
This helps determines the coldest zspages as candidates for writeback. Signed-off-by: Nhat Pham <nphamcs@gmail.com> --- mm/zsmalloc.c | 45 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 2 deletions(-) -- 2.30.2