diff mbox series

[2/2] mm/zswap: use only one pool in zswap

Message ID 20240617-zsmalloc-lock-mm-everything-v1-2-5e5081ea11b3@linux.dev (mailing list archive)
State New
Headers show
Series mm/zsmalloc: change back to per-size_class lock | expand

Commit Message

Chengming Zhou June 17, 2024, 12:57 p.m. UTC
Zswap uses 32 pools to workaround the locking scalability problem in
zsmalloc, which brings its own problems like memory waste and more
memory fragmentation.

Testing results show that we can have near performance with only one
pool in zswap after changing zsmalloc to use per-size_class lock instead
of pool spinlock.

Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
and zswap shrinker enabled with 10GB swapfile on ext4.

                                real    user    sys
6.10.0-rc3                      138.18  1241.38 1452.73
6.10.0-rc3-onepool              149.45  1240.45 1844.69
6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71

Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
---
 mm/zswap.c | 60 +++++++++++++++++++-----------------------------------------
 1 file changed, 19 insertions(+), 41 deletions(-)

Comments

Nhat Pham June 17, 2024, 5:01 p.m. UTC | #1
On Mon, Jun 17, 2024 at 5:58 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
>
> Zswap uses 32 pools to workaround the locking scalability problem in
> zsmalloc, which brings its own problems like memory waste and more
> memory fragmentation.
>
> Testing results show that we can have near performance with only one
> pool in zswap after changing zsmalloc to use per-size_class lock instead
> of pool spinlock.
>
> Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
> and zswap shrinker enabled with 10GB swapfile on ext4.
>
>                                 real    user    sys
> 6.10.0-rc3                      138.18  1241.38 1452.73
> 6.10.0-rc3-onepool              149.45  1240.45 1844.69
> 6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71
>
> Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>

Nice! I see minimal difference between one-pool-with-per-class lock
and current version, and the memory fragmentation should be reduced.

Anyway, I'll let the zsmalloc maintainers have the final say over
their side, but if this patch is picked up (and with the assumption
that the per-class lock is re-introduced), please feel free to
include:

Reviewed-by: Nhat Pham <nphamcs@gmail.com>
Yu Zhao June 17, 2024, 8:16 p.m. UTC | #2
On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
>
> Zswap uses 32 pools to workaround the locking scalability problem in
> zsmalloc,

Note that zpool can have other backends (zbud, z3fold), and the
original patch was developed (even before zswap could use zsmalloc) to
make sure it works for all the backend.

This patch only makes sense now only because zsmalloc became a lot
more popular than other backends (even though some distros still
default to zbud).

> which brings its own problems like memory waste and more
> memory fragmentation.
>
> Testing results show that we can have near performance with only one
> pool in zswap after changing zsmalloc to use per-size_class lock instead
> of pool spinlock.
>
> Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
> and zswap shrinker enabled with 10GB swapfile on ext4.
>
>                                 real    user    sys
> 6.10.0-rc3                      138.18  1241.38 1452.73
> 6.10.0-rc3-onepool              149.45  1240.45 1844.69
> 6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71
>
> Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
> ---
>  mm/zswap.c | 60 +++++++++++++++++++-----------------------------------------
>  1 file changed, 19 insertions(+), 41 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index e25a6808c2ed..5063c5372e51 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
>  module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
>                    uint, 0644);
>
> -/* Number of zpools in zswap_pool (empirically determined for scalability) */
> -#define ZSWAP_NR_ZPOOLS 32
> -
>  /* Enable/disable memory pressure-based shrinker. */
>  static bool zswap_shrinker_enabled = IS_ENABLED(
>                 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
> @@ -160,7 +157,7 @@ struct crypto_acomp_ctx {
>   * needs to be verified that it's still valid in the tree.
>   */
>  struct zswap_pool {
> -       struct zpool *zpools[ZSWAP_NR_ZPOOLS];
> +       struct zpool *zpool;
>         struct crypto_acomp_ctx __percpu *acomp_ctx;
>         struct percpu_ref ref;
>         struct list_head list;
> @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
>
>  #define zswap_pool_debug(msg, p)                               \
>         pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> -                zpool_get_type((p)->zpools[0]))
> +                zpool_get_type((p)->zpool))
>
>  /*********************************
>  * pool functions
> @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
>
>  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>  {
> -       int i;
>         struct zswap_pool *pool;
>         char name[38]; /* 'zswap' + 32 char (max) num + \0 */
>         gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>         if (!pool)
>                 return NULL;
>
> -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> -               /* unique name for each pool specifically required by zsmalloc */
> -               snprintf(name, 38, "zswap%x",
> -                        atomic_inc_return(&zswap_pools_count));
> -
> -               pool->zpools[i] = zpool_create_pool(type, name, gfp);
> -               if (!pool->zpools[i]) {
> -                       pr_err("%s zpool not available\n", type);
> -                       goto error;
> -               }
> +       /* unique name for each pool specifically required by zsmalloc */
> +       snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
> +       pool->zpool = zpool_create_pool(type, name, gfp);
> +       if (!pool->zpool) {
> +               pr_err("%s zpool not available\n", type);
> +               goto error;
>         }
> -       pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
>
>         strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
>
> @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>  error:
>         if (pool->acomp_ctx)
>                 free_percpu(pool->acomp_ctx);
> -       while (i--)
> -               zpool_destroy_pool(pool->zpools[i]);
> +       zpool_destroy_pool(pool->zpool);
>         kfree(pool);
>         return NULL;
>  }
> @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
>
>  static void zswap_pool_destroy(struct zswap_pool *pool)
>  {
> -       int i;
> -
>         zswap_pool_debug("destroying", pool);
>
>         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
>         free_percpu(pool->acomp_ctx);
>
> -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> -               zpool_destroy_pool(pool->zpools[i]);
> +       zpool_destroy_pool(pool->zpool);
>         kfree(pool);
>  }
>
> @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>         list_for_each_entry_rcu(pool, &zswap_pools, list) {
>                 if (strcmp(pool->tfm_name, compressor))
>                         continue;
> -               /* all zpools share the same type */
> -               if (strcmp(zpool_get_type(pool->zpools[0]), type))
> +               if (strcmp(zpool_get_type(pool->zpool), type))
>                         continue;
>                 /* if we can't get it, it's about to be destroyed */
>                 if (!zswap_pool_get(pool))
> @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void)
>         unsigned long total = 0;
>
>         rcu_read_lock();
> -       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> -               int i;
> -
> -               for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> -                       total += zpool_get_total_pages(pool->zpools[i]);
> -       }
> +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> +               total += zpool_get_total_pages(pool->zpool);
>         rcu_read_unlock();
>
>         return total;
> @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
>         kmem_cache_free(zswap_entry_cache, entry);
>  }
>
> -static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> -{
> -       return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
> -}
> -
>  /*
>   * Carries out the common pattern of freeing and entry's zpool allocation,
>   * freeing the entry itself, and decrementing the number of stored pages.
> @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
>  static void zswap_entry_free(struct zswap_entry *entry)
>  {
>         zswap_lru_del(&zswap_list_lru, entry);
> -       zpool_free(zswap_find_zpool(entry), entry->handle);
> +       zpool_free(entry->pool->zpool, entry->handle);
>         zswap_pool_put(entry->pool);
>         if (entry->objcg) {
>                 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
>         if (comp_ret)
>                 goto unlock;
>
> -       zpool = zswap_find_zpool(entry);
> +       zpool = entry->pool->zpool;
>         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
>         if (zpool_malloc_support_movable(zpool))
>                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
>
>  static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>  {
> -       struct zpool *zpool = zswap_find_zpool(entry);
> +       struct zpool *zpool = entry->pool->zpool;
>         struct scatterlist input, output;
>         struct crypto_acomp_ctx *acomp_ctx;
>         u8 *src;
> @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio)
>         return true;
>
>  store_failed:
> -       zpool_free(zswap_find_zpool(entry), entry->handle);
> +       zpool_free(entry->pool->zpool, entry->handle);
>  put_pool:
>         zswap_pool_put(entry->pool);
>  freepage:
> @@ -1683,7 +1661,7 @@ static int zswap_setup(void)
>         pool = __zswap_pool_create_fallback();
>         if (pool) {
>                 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> -                       zpool_get_type(pool->zpools[0]));
> +                       zpool_get_type(pool->zpool));
>                 list_add(&pool->list, &zswap_pools);
>                 zswap_has_pool = true;
>                 static_branch_enable(&zswap_ever_enabled);
>
> --
> 2.45.2
>
>
Yu Zhao June 17, 2024, 8:19 p.m. UTC | #3
On Mon, Jun 17, 2024 at 2:16 PM Yu Zhao <yuzhao@google.com> wrote:
>
> On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
> >
> > Zswap uses 32 pools to workaround the locking scalability problem in
> > zsmalloc,
>
> Note that zpool can have other backends (zbud, z3fold), and the
> original patch was developed (even before zswap could use zsmalloc) to
> make sure it works for all the backend.
>
> This patch only makes sense now only because zsmalloc became a lot
> more popular than other backends (even though some distros still
> default to zbud).

And this might also have answered Yosry's question about the
"historical context" here [1].

[1] https://lore.kernel.org/CAJD7tkbO+ZLdhs-9BpthztZX32i8C4=QEnoiXGS7bM399nqwzg@mail.gmail.com/



> > which brings its own problems like memory waste and more
> > memory fragmentation.
> >
> > Testing results show that we can have near performance with only one
> > pool in zswap after changing zsmalloc to use per-size_class lock instead
> > of pool spinlock.
> >
> > Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
> > and zswap shrinker enabled with 10GB swapfile on ext4.
> >
> >                                 real    user    sys
> > 6.10.0-rc3                      138.18  1241.38 1452.73
> > 6.10.0-rc3-onepool              149.45  1240.45 1844.69
> > 6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71
> >
> > Signed-off-by: Chengming Zhou <chengming.zhou@linux.dev>
> > ---
> >  mm/zswap.c | 60 +++++++++++++++++++-----------------------------------------
> >  1 file changed, 19 insertions(+), 41 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index e25a6808c2ed..5063c5372e51 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
> >  module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
> >                    uint, 0644);
> >
> > -/* Number of zpools in zswap_pool (empirically determined for scalability) */
> > -#define ZSWAP_NR_ZPOOLS 32
> > -
> >  /* Enable/disable memory pressure-based shrinker. */
> >  static bool zswap_shrinker_enabled = IS_ENABLED(
> >                 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
> > @@ -160,7 +157,7 @@ struct crypto_acomp_ctx {
> >   * needs to be verified that it's still valid in the tree.
> >   */
> >  struct zswap_pool {
> > -       struct zpool *zpools[ZSWAP_NR_ZPOOLS];
> > +       struct zpool *zpool;
> >         struct crypto_acomp_ctx __percpu *acomp_ctx;
> >         struct percpu_ref ref;
> >         struct list_head list;
> > @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
> >
> >  #define zswap_pool_debug(msg, p)                               \
> >         pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> > -                zpool_get_type((p)->zpools[0]))
> > +                zpool_get_type((p)->zpool))
> >
> >  /*********************************
> >  * pool functions
> > @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
> >
> >  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >  {
> > -       int i;
> >         struct zswap_pool *pool;
> >         char name[38]; /* 'zswap' + 32 char (max) num + \0 */
> >         gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> > @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >         if (!pool)
> >                 return NULL;
> >
> > -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> > -               /* unique name for each pool specifically required by zsmalloc */
> > -               snprintf(name, 38, "zswap%x",
> > -                        atomic_inc_return(&zswap_pools_count));
> > -
> > -               pool->zpools[i] = zpool_create_pool(type, name, gfp);
> > -               if (!pool->zpools[i]) {
> > -                       pr_err("%s zpool not available\n", type);
> > -                       goto error;
> > -               }
> > +       /* unique name for each pool specifically required by zsmalloc */
> > +       snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
> > +       pool->zpool = zpool_create_pool(type, name, gfp);
> > +       if (!pool->zpool) {
> > +               pr_err("%s zpool not available\n", type);
> > +               goto error;
> >         }
> > -       pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> > +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> >
> >         strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> >
> > @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >  error:
> >         if (pool->acomp_ctx)
> >                 free_percpu(pool->acomp_ctx);
> > -       while (i--)
> > -               zpool_destroy_pool(pool->zpools[i]);
> > +       zpool_destroy_pool(pool->zpool);
> >         kfree(pool);
> >         return NULL;
> >  }
> > @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
> >
> >  static void zswap_pool_destroy(struct zswap_pool *pool)
> >  {
> > -       int i;
> > -
> >         zswap_pool_debug("destroying", pool);
> >
> >         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
> >         free_percpu(pool->acomp_ctx);
> >
> > -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > -               zpool_destroy_pool(pool->zpools[i]);
> > +       zpool_destroy_pool(pool->zpool);
> >         kfree(pool);
> >  }
> >
> > @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> >         list_for_each_entry_rcu(pool, &zswap_pools, list) {
> >                 if (strcmp(pool->tfm_name, compressor))
> >                         continue;
> > -               /* all zpools share the same type */
> > -               if (strcmp(zpool_get_type(pool->zpools[0]), type))
> > +               if (strcmp(zpool_get_type(pool->zpool), type))
> >                         continue;
> >                 /* if we can't get it, it's about to be destroyed */
> >                 if (!zswap_pool_get(pool))
> > @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void)
> >         unsigned long total = 0;
> >
> >         rcu_read_lock();
> > -       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> > -               int i;
> > -
> > -               for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > -                       total += zpool_get_total_pages(pool->zpools[i]);
> > -       }
> > +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> > +               total += zpool_get_total_pages(pool->zpool);
> >         rcu_read_unlock();
> >
> >         return total;
> > @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
> >         kmem_cache_free(zswap_entry_cache, entry);
> >  }
> >
> > -static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> > -{
> > -       return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
> > -}
> > -
> >  /*
> >   * Carries out the common pattern of freeing and entry's zpool allocation,
> >   * freeing the entry itself, and decrementing the number of stored pages.
> > @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> >  static void zswap_entry_free(struct zswap_entry *entry)
> >  {
> >         zswap_lru_del(&zswap_list_lru, entry);
> > -       zpool_free(zswap_find_zpool(entry), entry->handle);
> > +       zpool_free(entry->pool->zpool, entry->handle);
> >         zswap_pool_put(entry->pool);
> >         if (entry->objcg) {
> >                 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> > @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> >         if (comp_ret)
> >                 goto unlock;
> >
> > -       zpool = zswap_find_zpool(entry);
> > +       zpool = entry->pool->zpool;
> >         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> >         if (zpool_malloc_support_movable(zpool))
> >                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> > @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> >
> >  static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> >  {
> > -       struct zpool *zpool = zswap_find_zpool(entry);
> > +       struct zpool *zpool = entry->pool->zpool;
> >         struct scatterlist input, output;
> >         struct crypto_acomp_ctx *acomp_ctx;
> >         u8 *src;
> > @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio)
> >         return true;
> >
> >  store_failed:
> > -       zpool_free(zswap_find_zpool(entry), entry->handle);
> > +       zpool_free(entry->pool->zpool, entry->handle);
> >  put_pool:
> >         zswap_pool_put(entry->pool);
> >  freepage:
> > @@ -1683,7 +1661,7 @@ static int zswap_setup(void)
> >         pool = __zswap_pool_create_fallback();
> >         if (pool) {
> >                 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> > -                       zpool_get_type(pool->zpools[0]));
> > +                       zpool_get_type(pool->zpool));
> >                 list_add(&pool->list, &zswap_pools);
> >                 zswap_has_pool = true;
> >                 static_branch_enable(&zswap_ever_enabled);
> >
> > --
> > 2.45.2
> >
> >
Chengming Zhou June 18, 2024, 7:59 a.m. UTC | #4
On 2024/6/17 13:16, Yu Zhao wrote:
> On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
>>
>> Zswap uses 32 pools to workaround the locking scalability problem in
>> zsmalloc,
> 
> Note that zpool can have other backends (zbud, z3fold), and the
> original patch was developed (even before zswap could use zsmalloc) to
> make sure it works for all the backend.
> 
> This patch only makes sense now only because zsmalloc became a lot
> more popular than other backends (even though some distros still
> default to zbud).
> 

Right, we mostly focus on zsmalloc, I just do the same testing using zbud:

                               	real    user    sys
6.10.0-rc3-zbud     		138.23  1239.58 1430.09
6.10.0-rc3-onepool-zbud       	139.64  1241.37 1516.59

Since we don't do any locking optimization for zbud, so performance
is worse than 32 pools as expected.

Actually Yosry did a great testing [1] to compare zsmalloc, zbud and 
z3flod, to support that we can just use zsmalloc in zswap, so we can
drop zpool, zbud and z3flod.

I will include this information in the changelog in the next version.

[1] 
https://lore.kernel.org/lkml/CAJD7tkbRF6od-2x_L8-A1QL3=2Ww13sCj4S3i4bNndqF+3+_Vg@mail.gmail.com/

Thanks.
Yosry Ahmed June 24, 2024, 11:39 a.m. UTC | #5
On Mon, Jun 17, 2024 at 1:20 PM Yu Zhao <yuzhao@google.com> wrote:
>
> On Mon, Jun 17, 2024 at 2:16 PM Yu Zhao <yuzhao@google.com> wrote:
> >
> > On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
> > >
> > > Zswap uses 32 pools to workaround the locking scalability problem in
> > > zsmalloc,
> >
> > Note that zpool can have other backends (zbud, z3fold), and the
> > original patch was developed (even before zswap could use zsmalloc) to
> > make sure it works for all the backend.
> >
> > This patch only makes sense now only because zsmalloc became a lot
> > more popular than other backends (even though some distros still
> > default to zbud).
>
> And this might also have answered Yosry's question about the
> "historical context" here [1].
>
> [1] https://lore.kernel.org/CAJD7tkbO+ZLdhs-9BpthztZX32i8C4=QEnoiXGS7bM399nqwzg@mail.gmail.com/

It does, thanks.
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index e25a6808c2ed..5063c5372e51 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -122,9 +122,6 @@  static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
 module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
 		   uint, 0644);
 
-/* Number of zpools in zswap_pool (empirically determined for scalability) */
-#define ZSWAP_NR_ZPOOLS 32
-
 /* Enable/disable memory pressure-based shrinker. */
 static bool zswap_shrinker_enabled = IS_ENABLED(
 		CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
@@ -160,7 +157,7 @@  struct crypto_acomp_ctx {
  * needs to be verified that it's still valid in the tree.
  */
 struct zswap_pool {
-	struct zpool *zpools[ZSWAP_NR_ZPOOLS];
+	struct zpool *zpool;
 	struct crypto_acomp_ctx __percpu *acomp_ctx;
 	struct percpu_ref ref;
 	struct list_head list;
@@ -237,7 +234,7 @@  static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
 
 #define zswap_pool_debug(msg, p)				\
 	pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,		\
-		 zpool_get_type((p)->zpools[0]))
+		 zpool_get_type((p)->zpool))
 
 /*********************************
 * pool functions
@@ -246,7 +243,6 @@  static void __zswap_pool_empty(struct percpu_ref *ref);
 
 static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 {
-	int i;
 	struct zswap_pool *pool;
 	char name[38]; /* 'zswap' + 32 char (max) num + \0 */
 	gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
@@ -267,18 +263,14 @@  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	if (!pool)
 		return NULL;
 
-	for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
-		/* unique name for each pool specifically required by zsmalloc */
-		snprintf(name, 38, "zswap%x",
-			 atomic_inc_return(&zswap_pools_count));
-
-		pool->zpools[i] = zpool_create_pool(type, name, gfp);
-		if (!pool->zpools[i]) {
-			pr_err("%s zpool not available\n", type);
-			goto error;
-		}
+	/* unique name for each pool specifically required by zsmalloc */
+	snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
+	pool->zpool = zpool_create_pool(type, name, gfp);
+	if (!pool->zpool) {
+		pr_err("%s zpool not available\n", type);
+		goto error;
 	}
-	pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
+	pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
 
 	strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
 
@@ -311,8 +303,7 @@  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 error:
 	if (pool->acomp_ctx)
 		free_percpu(pool->acomp_ctx);
-	while (i--)
-		zpool_destroy_pool(pool->zpools[i]);
+	zpool_destroy_pool(pool->zpool);
 	kfree(pool);
 	return NULL;
 }
@@ -361,15 +352,12 @@  static struct zswap_pool *__zswap_pool_create_fallback(void)
 
 static void zswap_pool_destroy(struct zswap_pool *pool)
 {
-	int i;
-
 	zswap_pool_debug("destroying", pool);
 
 	cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
 	free_percpu(pool->acomp_ctx);
 
-	for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
-		zpool_destroy_pool(pool->zpools[i]);
+	zpool_destroy_pool(pool->zpool);
 	kfree(pool);
 }
 
@@ -464,8 +452,7 @@  static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 	list_for_each_entry_rcu(pool, &zswap_pools, list) {
 		if (strcmp(pool->tfm_name, compressor))
 			continue;
-		/* all zpools share the same type */
-		if (strcmp(zpool_get_type(pool->zpools[0]), type))
+		if (strcmp(zpool_get_type(pool->zpool), type))
 			continue;
 		/* if we can't get it, it's about to be destroyed */
 		if (!zswap_pool_get(pool))
@@ -492,12 +479,8 @@  unsigned long zswap_total_pages(void)
 	unsigned long total = 0;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(pool, &zswap_pools, list) {
-		int i;
-
-		for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
-			total += zpool_get_total_pages(pool->zpools[i]);
-	}
+	list_for_each_entry_rcu(pool, &zswap_pools, list)
+		total += zpool_get_total_pages(pool->zpool);
 	rcu_read_unlock();
 
 	return total;
@@ -802,11 +785,6 @@  static void zswap_entry_cache_free(struct zswap_entry *entry)
 	kmem_cache_free(zswap_entry_cache, entry);
 }
 
-static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
-{
-	return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
-}
-
 /*
  * Carries out the common pattern of freeing and entry's zpool allocation,
  * freeing the entry itself, and decrementing the number of stored pages.
@@ -814,7 +792,7 @@  static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
 static void zswap_entry_free(struct zswap_entry *entry)
 {
 	zswap_lru_del(&zswap_list_lru, entry);
-	zpool_free(zswap_find_zpool(entry), entry->handle);
+	zpool_free(entry->pool->zpool, entry->handle);
 	zswap_pool_put(entry->pool);
 	if (entry->objcg) {
 		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
@@ -939,7 +917,7 @@  static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 	if (comp_ret)
 		goto unlock;
 
-	zpool = zswap_find_zpool(entry);
+	zpool = entry->pool->zpool;
 	gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 	if (zpool_malloc_support_movable(zpool))
 		gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
@@ -968,7 +946,7 @@  static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 
 static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 {
-	struct zpool *zpool = zswap_find_zpool(entry);
+	struct zpool *zpool = entry->pool->zpool;
 	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
 	u8 *src;
@@ -1467,7 +1445,7 @@  bool zswap_store(struct folio *folio)
 	return true;
 
 store_failed:
-	zpool_free(zswap_find_zpool(entry), entry->handle);
+	zpool_free(entry->pool->zpool, entry->handle);
 put_pool:
 	zswap_pool_put(entry->pool);
 freepage:
@@ -1683,7 +1661,7 @@  static int zswap_setup(void)
 	pool = __zswap_pool_create_fallback();
 	if (pool) {
 		pr_info("loaded using pool %s/%s\n", pool->tfm_name,
-			zpool_get_type(pool->zpools[0]));
+			zpool_get_type(pool->zpool));
 		list_add(&pool->list, &zswap_pools);
 		zswap_has_pool = true;
 		static_branch_enable(&zswap_ever_enabled);