Message ID | 20240621-zsmalloc-lock-mm-everything-v2-2-d30e9cd2b793@linux.dev (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/zsmalloc: change back to per-size_class lock | expand |
On Fri, Jun 21, 2024 at 12:15 AM Chengming Zhou <chengming.zhou@linux.dev> wrote: > > Zswap uses 32 pools to workaround the locking scalability problem in > zswap backends (mainly zsmalloc nowadays), which brings its own problems > like memory waste and more memory fragmentation. > > Testing results show that we can have near performance with only one > pool in zswap after changing zsmalloc to use per-size_class lock instead > of pool spinlock. > > Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB, > and zswap shrinker enabled with 10GB swapfile on ext4. > > real user sys > 6.10.0-rc3 138.18 1241.38 1452.73 > 6.10.0-rc3-onepool 149.45 1240.45 1844.69 > 6.10.0-rc3-onepool-perclass 138.23 1242.37 1469.71 > > And do the same testing using zbud, which shows a little worse performance > as expected since we don't do any locking optimization for zbud. I think > it's acceptable since zsmalloc became a lot more popular than other > backends, and we may want to support only zsmalloc in the future. > > real user sys > 6.10.0-rc3-zbud 138.23 1239.58 1430.09 > 6.10.0-rc3-onepool-zbud 139.64 1241.37 1516.59 > > Reviewed-by: Nhat Pham <nphamcs@gmail.com> > Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev> > --- > mm/zswap.c | 60 +++++++++++++++++++----------------------------------------- > 1 file changed, 19 insertions(+), 41 deletions(-) > > diff --git a/mm/zswap.c b/mm/zswap.c > index e25a6808c2ed..7925a3d0903e 100644 > --- a/mm/zswap.c > +++ b/mm/zswap.c > @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ > module_param_named(accept_threshold_percent, zswap_accept_thr_percent, > uint, 0644); > > -/* Number of zpools in zswap_pool (empirically determined for scalability) */ > -#define ZSWAP_NR_ZPOOLS 32 > - > /* Enable/disable memory pressure-based shrinker. */ > static bool zswap_shrinker_enabled = IS_ENABLED( > CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); > @@ -160,7 +157,7 @@ struct crypto_acomp_ctx { > * needs to be verified that it's still valid in the tree. > */ > struct zswap_pool { > - struct zpool *zpools[ZSWAP_NR_ZPOOLS]; > + struct zpool *zpool; > struct crypto_acomp_ctx __percpu *acomp_ctx; > struct percpu_ref ref; > struct list_head list; > @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) > > #define zswap_pool_debug(msg, p) \ > pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ > - zpool_get_type((p)->zpools[0])) > + zpool_get_type((p)->zpool)) > > /********************************* > * pool functions > @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref); > > static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > { > - int i; > struct zswap_pool *pool; > char name[38]; /* 'zswap' + 32 char (max) num + \0 */ > gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; > @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > if (!pool) > return NULL; > > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { > - /* unique name for each pool specifically required by zsmalloc */ > - snprintf(name, 38, "zswap%x", > - atomic_inc_return(&zswap_pools_count)); > - > - pool->zpools[i] = zpool_create_pool(type, name, gfp); > - if (!pool->zpools[i]) { > - pr_err("%s zpool not available\n", type); > - goto error; > - } > + /* unique name for each pool specifically required by zsmalloc */ > + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); > + pool->zpool = zpool_create_pool(type, name, gfp); > + if (!pool->zpool) { > + pr_err("%s zpool not available\n", type); > + return NULL; We need to goto error here to free the pool. > } > - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); > + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); > > strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); > > @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) > error: > if (pool->acomp_ctx) > free_percpu(pool->acomp_ctx); > - while (i--) > - zpool_destroy_pool(pool->zpools[i]); > + zpool_destroy_pool(pool->zpool); .. and then we will need a NULL check needed here.
On 2024/6/24 20:14, Yosry Ahmed wrote: > On Fri, Jun 21, 2024 at 12:15 AM Chengming Zhou > <chengming.zhou@linux.dev> wrote: >> >> Zswap uses 32 pools to workaround the locking scalability problem in >> zswap backends (mainly zsmalloc nowadays), which brings its own problems >> like memory waste and more memory fragmentation. >> >> Testing results show that we can have near performance with only one >> pool in zswap after changing zsmalloc to use per-size_class lock instead >> of pool spinlock. >> >> Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB, >> and zswap shrinker enabled with 10GB swapfile on ext4. >> >> real user sys >> 6.10.0-rc3 138.18 1241.38 1452.73 >> 6.10.0-rc3-onepool 149.45 1240.45 1844.69 >> 6.10.0-rc3-onepool-perclass 138.23 1242.37 1469.71 >> >> And do the same testing using zbud, which shows a little worse performance >> as expected since we don't do any locking optimization for zbud. I think >> it's acceptable since zsmalloc became a lot more popular than other >> backends, and we may want to support only zsmalloc in the future. >> >> real user sys >> 6.10.0-rc3-zbud 138.23 1239.58 1430.09 >> 6.10.0-rc3-onepool-zbud 139.64 1241.37 1516.59 >> >> Reviewed-by: Nhat Pham <nphamcs@gmail.com> >> Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev> >> --- >> mm/zswap.c | 60 +++++++++++++++++++----------------------------------------- >> 1 file changed, 19 insertions(+), 41 deletions(-) >> >> diff --git a/mm/zswap.c b/mm/zswap.c >> index e25a6808c2ed..7925a3d0903e 100644 >> --- a/mm/zswap.c >> +++ b/mm/zswap.c >> @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ >> module_param_named(accept_threshold_percent, zswap_accept_thr_percent, >> uint, 0644); >> >> -/* Number of zpools in zswap_pool (empirically determined for scalability) */ >> -#define ZSWAP_NR_ZPOOLS 32 >> - >> /* Enable/disable memory pressure-based shrinker. */ >> static bool zswap_shrinker_enabled = IS_ENABLED( >> CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); >> @@ -160,7 +157,7 @@ struct crypto_acomp_ctx { >> * needs to be verified that it's still valid in the tree. >> */ >> struct zswap_pool { >> - struct zpool *zpools[ZSWAP_NR_ZPOOLS]; >> + struct zpool *zpool; >> struct crypto_acomp_ctx __percpu *acomp_ctx; >> struct percpu_ref ref; >> struct list_head list; >> @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) >> >> #define zswap_pool_debug(msg, p) \ >> pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ >> - zpool_get_type((p)->zpools[0])) >> + zpool_get_type((p)->zpool)) >> >> /********************************* >> * pool functions >> @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref); >> >> static struct zswap_pool *zswap_pool_create(char *type, char *compressor) >> { >> - int i; >> struct zswap_pool *pool; >> char name[38]; /* 'zswap' + 32 char (max) num + \0 */ >> gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; >> @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) >> if (!pool) >> return NULL; >> >> - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { >> - /* unique name for each pool specifically required by zsmalloc */ >> - snprintf(name, 38, "zswap%x", >> - atomic_inc_return(&zswap_pools_count)); >> - >> - pool->zpools[i] = zpool_create_pool(type, name, gfp); >> - if (!pool->zpools[i]) { >> - pr_err("%s zpool not available\n", type); >> - goto error; >> - } >> + /* unique name for each pool specifically required by zsmalloc */ >> + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); >> + pool->zpool = zpool_create_pool(type, name, gfp); >> + if (!pool->zpool) { >> + pr_err("%s zpool not available\n", type); >> + return NULL; > > We need to goto error here to free the pool. > >> } >> - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); >> + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); >> >> strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); >> >> @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) >> error: >> if (pool->acomp_ctx) >> free_percpu(pool->acomp_ctx); >> - while (i--) >> - zpool_destroy_pool(pool->zpools[i]); >> + zpool_destroy_pool(pool->zpool); > > .. and then we will need a NULL check needed here. Oops, my bad, will fix in the next version. Thanks!
diff --git a/mm/zswap.c b/mm/zswap.c index e25a6808c2ed..7925a3d0903e 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */ module_param_named(accept_threshold_percent, zswap_accept_thr_percent, uint, 0644); -/* Number of zpools in zswap_pool (empirically determined for scalability) */ -#define ZSWAP_NR_ZPOOLS 32 - /* Enable/disable memory pressure-based shrinker. */ static bool zswap_shrinker_enabled = IS_ENABLED( CONFIG_ZSWAP_SHRINKER_DEFAULT_ON); @@ -160,7 +157,7 @@ struct crypto_acomp_ctx { * needs to be verified that it's still valid in the tree. */ struct zswap_pool { - struct zpool *zpools[ZSWAP_NR_ZPOOLS]; + struct zpool *zpool; struct crypto_acomp_ctx __percpu *acomp_ctx; struct percpu_ref ref; struct list_head list; @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp) #define zswap_pool_debug(msg, p) \ pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \ - zpool_get_type((p)->zpools[0])) + zpool_get_type((p)->zpool)) /********************************* * pool functions @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref); static struct zswap_pool *zswap_pool_create(char *type, char *compressor) { - int i; struct zswap_pool *pool; char name[38]; /* 'zswap' + 32 char (max) num + \0 */ gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) if (!pool) return NULL; - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) { - /* unique name for each pool specifically required by zsmalloc */ - snprintf(name, 38, "zswap%x", - atomic_inc_return(&zswap_pools_count)); - - pool->zpools[i] = zpool_create_pool(type, name, gfp); - if (!pool->zpools[i]) { - pr_err("%s zpool not available\n", type); - goto error; - } + /* unique name for each pool specifically required by zsmalloc */ + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count)); + pool->zpool = zpool_create_pool(type, name, gfp); + if (!pool->zpool) { + pr_err("%s zpool not available\n", type); + return NULL; } - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0])); + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool)); strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name)); @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor) error: if (pool->acomp_ctx) free_percpu(pool->acomp_ctx); - while (i--) - zpool_destroy_pool(pool->zpools[i]); + zpool_destroy_pool(pool->zpool); kfree(pool); return NULL; } @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void) static void zswap_pool_destroy(struct zswap_pool *pool) { - int i; - zswap_pool_debug("destroying", pool); cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node); free_percpu(pool->acomp_ctx); - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) - zpool_destroy_pool(pool->zpools[i]); + zpool_destroy_pool(pool->zpool); kfree(pool); } @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor) list_for_each_entry_rcu(pool, &zswap_pools, list) { if (strcmp(pool->tfm_name, compressor)) continue; - /* all zpools share the same type */ - if (strcmp(zpool_get_type(pool->zpools[0]), type)) + if (strcmp(zpool_get_type(pool->zpool), type)) continue; /* if we can't get it, it's about to be destroyed */ if (!zswap_pool_get(pool)) @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void) unsigned long total = 0; rcu_read_lock(); - list_for_each_entry_rcu(pool, &zswap_pools, list) { - int i; - - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) - total += zpool_get_total_pages(pool->zpools[i]); - } + list_for_each_entry_rcu(pool, &zswap_pools, list) + total += zpool_get_total_pages(pool->zpool); rcu_read_unlock(); return total; @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry) kmem_cache_free(zswap_entry_cache, entry); } -static struct zpool *zswap_find_zpool(struct zswap_entry *entry) -{ - return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))]; -} - /* * Carries out the common pattern of freeing and entry's zpool allocation, * freeing the entry itself, and decrementing the number of stored pages. @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry) static void zswap_entry_free(struct zswap_entry *entry) { zswap_lru_del(&zswap_list_lru, entry); - zpool_free(zswap_find_zpool(entry), entry->handle); + zpool_free(entry->pool->zpool, entry->handle); zswap_pool_put(entry->pool); if (entry->objcg) { obj_cgroup_uncharge_zswap(entry->objcg, entry->length); @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) if (comp_ret) goto unlock; - zpool = zswap_find_zpool(entry); + zpool = entry->pool->zpool; gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM; if (zpool_malloc_support_movable(zpool)) gfp |= __GFP_HIGHMEM | __GFP_MOVABLE; @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) { - struct zpool *zpool = zswap_find_zpool(entry); + struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; u8 *src; @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio) return true; store_failed: - zpool_free(zswap_find_zpool(entry), entry->handle); + zpool_free(entry->pool->zpool, entry->handle); put_pool: zswap_pool_put(entry->pool); freepage: @@ -1683,7 +1661,7 @@ static int zswap_setup(void) pool = __zswap_pool_create_fallback(); if (pool) { pr_info("loaded using pool %s/%s\n", pool->tfm_name, - zpool_get_type(pool->zpools[0])); + zpool_get_type(pool->zpool)); list_add(&pool->list, &zswap_pools); zswap_has_pool = true; static_branch_enable(&zswap_ever_enabled);