Message ID | 20250129064853.2210753-2-senozhatsky@chromium.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | zsmalloc: preemptible object mapping | expand |
On Wed, Jan 29, 2025 at 03:43:47PM +0900, Sergey Senozhatsky wrote: > We currently have a mix of migrate_{read,write}_lock() helpers > that lock zspages, but it's zs_pool that actually has a ->migrate_lock > access to which is opene-coded. Factor out pool migrate locking > into helpers, zspage migration locking API will be renamed to > reduce confusion. But why did we remove "migrate" from the helpers' names if this is the real migrate lock? It seems like the real problem is how the zspage lock helpers are named, which is addressed later. > > Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> > --- > mm/zsmalloc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- > 1 file changed, 41 insertions(+), 15 deletions(-) > > diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c > index 817626a351f8..2f8a2b139919 100644 > --- a/mm/zsmalloc.c > +++ b/mm/zsmalloc.c > @@ -204,7 +204,8 @@ struct link_free { > }; > > struct zs_pool { > - const char *name; > + /* protect page/zspage migration */ > + rwlock_t migrate_lock; > > struct size_class *size_class[ZS_SIZE_CLASSES]; > struct kmem_cache *handle_cachep; > @@ -213,6 +214,7 @@ struct zs_pool { > atomic_long_t pages_allocated; > > struct zs_pool_stats stats; > + atomic_t compaction_in_progress; > > /* Compact classes */ > struct shrinker *shrinker; > @@ -223,11 +225,35 @@ struct zs_pool { > #ifdef CONFIG_COMPACTION > struct work_struct free_work; > #endif > - /* protect page/zspage migration */ > - rwlock_t migrate_lock; > - atomic_t compaction_in_progress; > + > + const char *name; Are the struct moves relevant to the patch or just to improve readability? I am generally scared to move hot members around unnecessarily due to cache-line sharing problems -- although I don't know if these are really hot. > }; > > +static void pool_write_unlock(struct zs_pool *pool) > +{ > + write_unlock(&pool->migrate_lock); > +} > + > +static void pool_write_lock(struct zs_pool *pool) > +{ > + write_lock(&pool->migrate_lock); > +} > + > +static void pool_read_unlock(struct zs_pool *pool) > +{ > + read_unlock(&pool->migrate_lock); > +} > + > +static void pool_read_lock(struct zs_pool *pool) > +{ > + read_lock(&pool->migrate_lock); > +} > + > +static bool pool_lock_is_contended(struct zs_pool *pool) > +{ > + return rwlock_is_contended(&pool->migrate_lock); > +} > + > static inline void zpdesc_set_first(struct zpdesc *zpdesc) > { > SetPagePrivate(zpdesc_page(zpdesc)); > @@ -1206,7 +1232,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, > BUG_ON(in_interrupt()); > > /* It guarantees it can get zspage from handle safely */ > - read_lock(&pool->migrate_lock); > + pool_read_lock(pool); > obj = handle_to_obj(handle); > obj_to_location(obj, &zpdesc, &obj_idx); > zspage = get_zspage(zpdesc); > @@ -1218,7 +1244,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, > * which is smaller granularity. > */ > migrate_read_lock(zspage); > - read_unlock(&pool->migrate_lock); > + pool_read_unlock(pool); > > class = zspage_class(pool, zspage); > off = offset_in_page(class->size * obj_idx); > @@ -1453,13 +1479,13 @@ void zs_free(struct zs_pool *pool, unsigned long handle) > * The pool->migrate_lock protects the race with zpage's migration > * so it's safe to get the page from handle. > */ > - read_lock(&pool->migrate_lock); > + pool_read_lock(pool); > obj = handle_to_obj(handle); > obj_to_zpdesc(obj, &f_zpdesc); > zspage = get_zspage(f_zpdesc); > class = zspage_class(pool, zspage); > spin_lock(&class->lock); > - read_unlock(&pool->migrate_lock); > + pool_read_unlock(pool); > > class_stat_sub(class, ZS_OBJS_INUSE, 1); > obj_free(class->size, obj); > @@ -1796,7 +1822,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, > * The pool migrate_lock protects the race between zpage migration > * and zs_free. > */ > - write_lock(&pool->migrate_lock); > + pool_write_lock(pool); > class = zspage_class(pool, zspage); > > /* > @@ -1833,7 +1859,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, > * Since we complete the data copy and set up new zspage structure, > * it's okay to release migration_lock. > */ > - write_unlock(&pool->migrate_lock); > + pool_write_unlock(pool); > spin_unlock(&class->lock); > migrate_write_unlock(zspage); > > @@ -1956,7 +1982,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, > * protect the race between zpage migration and zs_free > * as well as zpage allocation/free > */ > - write_lock(&pool->migrate_lock); > + pool_write_lock(pool); > spin_lock(&class->lock); > while (zs_can_compact(class)) { > int fg; > @@ -1983,14 +2009,14 @@ static unsigned long __zs_compact(struct zs_pool *pool, > src_zspage = NULL; > > if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100 > - || rwlock_is_contended(&pool->migrate_lock)) { > + || pool_lock_is_contended(pool)) { > putback_zspage(class, dst_zspage); > dst_zspage = NULL; > > spin_unlock(&class->lock); > - write_unlock(&pool->migrate_lock); > + pool_write_unlock(pool); > cond_resched(); > - write_lock(&pool->migrate_lock); > + pool_write_lock(pool); > spin_lock(&class->lock); > } > } > @@ -2002,7 +2028,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, > putback_zspage(class, dst_zspage); > > spin_unlock(&class->lock); > - write_unlock(&pool->migrate_lock); > + pool_write_unlock(pool); > > return pages_freed; > } > -- > 2.48.1.262.g85cc9f2d1e-goog >
On (25/01/29 16:59), Yosry Ahmed wrote: > On Wed, Jan 29, 2025 at 03:43:47PM +0900, Sergey Senozhatsky wrote: > > We currently have a mix of migrate_{read,write}_lock() helpers > > that lock zspages, but it's zs_pool that actually has a ->migrate_lock > > access to which is opene-coded. Factor out pool migrate locking > > into helpers, zspage migration locking API will be renamed to > > reduce confusion. > > But why did we remove "migrate" from the helpers' names if this is the > real migrate lock? It seems like the real problem is how the zspage lock > helpers are named, which is addressed later. So this is deliberately. I guess, just like with page-faults in zs_map_object(), this naming comes from the time when we had fewer locks and less functionality. These days we have 3 zsmalloc locks that can "prevent migration" at different points. Even size class spin-lock. But it's not only about migration anymore. We also now have compaction (defragmentation) that these locks synchronize. So I want to have a reader-write naming scheme. > > struct zs_pool { > > - const char *name; > > + /* protect page/zspage migration */ > > + rwlock_t migrate_lock; > > > > struct size_class *size_class[ZS_SIZE_CLASSES]; > > struct kmem_cache *handle_cachep; > > @@ -213,6 +214,7 @@ struct zs_pool { > > atomic_long_t pages_allocated; > > > > struct zs_pool_stats stats; > > + atomic_t compaction_in_progress; > > > > /* Compact classes */ > > struct shrinker *shrinker; > > @@ -223,11 +225,35 @@ struct zs_pool { > > #ifdef CONFIG_COMPACTION > > struct work_struct free_work; > > #endif > > - /* protect page/zspage migration */ > > - rwlock_t migrate_lock; > > - atomic_t compaction_in_progress; > > + > > + const char *name; > > Are the struct moves relevant to the patch or just to improve > readability? Relevance is relative :) That moved from the patch which also converted the lock to rwsem. I'll move this to a separate patch. > I am generally scared to move hot members around unnecessarily > due to cache-line sharing problems -- although I don't know if > these are really hot. Size classes are basically static - once we init the array it becomes RO. Having migrate_lock at the bottom forces us to jump over a big RO zs_pool region, besides we never use ->name (unlike migrate_lock) so having it at offset 0 is unnecessary. zs_pool_stats and compaction_in_progress are modified only during compaction, which we do not run in parallel (only one CPU can do compaction at a time), so we can do something like --- struct zs_pool { - const char *name; + /* protect page/zspage migration */ + rwlock_t migrate_lock; struct size_class *size_class[ZS_SIZE_CLASSES]; - struct kmem_cache *handle_cachep; - struct kmem_cache *zspage_cachep; atomic_long_t pages_allocated; - struct zs_pool_stats stats; + struct kmem_cache *handle_cachep; + struct kmem_cache *zspage_cachep; /* Compact classes */ struct shrinker *shrinker; @@ -223,9 +223,9 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - /* protect page/zspage migration */ - rwlock_t migrate_lock; + const char *name; atomic_t compaction_in_progress; + struct zs_pool_stats stats; };
On Thu, Jan 30, 2025 at 01:01:15PM +0900, Sergey Senozhatsky wrote: > On (25/01/29 16:59), Yosry Ahmed wrote: > > On Wed, Jan 29, 2025 at 03:43:47PM +0900, Sergey Senozhatsky wrote: > > > We currently have a mix of migrate_{read,write}_lock() helpers > > > that lock zspages, but it's zs_pool that actually has a ->migrate_lock > > > access to which is opene-coded. Factor out pool migrate locking > > > into helpers, zspage migration locking API will be renamed to > > > reduce confusion. > > > > But why did we remove "migrate" from the helpers' names if this is the > > real migrate lock? It seems like the real problem is how the zspage lock > > helpers are named, which is addressed later. > > So this is deliberately. I guess, just like with page-faults in > zs_map_object(), this naming comes from the time when we had fewer > locks and less functionality. These days we have 3 zsmalloc locks that > can "prevent migration" at different points. Even size class spin-lock. > But it's not only about migration anymore. We also now have compaction > (defragmentation) that these locks synchronize. So I want to have a > reader-write naming scheme. In this case shouldn't we also rename the lock itself? > > > > struct zs_pool { > > > - const char *name; > > > + /* protect page/zspage migration */ > > > + rwlock_t migrate_lock; > > > > > > struct size_class *size_class[ZS_SIZE_CLASSES]; > > > struct kmem_cache *handle_cachep; > > > @@ -213,6 +214,7 @@ struct zs_pool { > > > atomic_long_t pages_allocated; > > > > > > struct zs_pool_stats stats; > > > + atomic_t compaction_in_progress; > > > > > > /* Compact classes */ > > > struct shrinker *shrinker; > > > @@ -223,11 +225,35 @@ struct zs_pool { > > > #ifdef CONFIG_COMPACTION > > > struct work_struct free_work; > > > #endif > > > - /* protect page/zspage migration */ > > > - rwlock_t migrate_lock; > > > - atomic_t compaction_in_progress; > > > + > > > + const char *name; > > > > Are the struct moves relevant to the patch or just to improve > > readability? > > Relevance is relative :) That moved from the patch which also > converted the lock to rwsem. I'll move this to a separate > patch. > > > I am generally scared to move hot members around unnecessarily > > due to cache-line sharing problems -- although I don't know if > > these are really hot. > > Size classes are basically static - once we init the array it > becomes RO. Having migrate_lock at the bottom forces us to > jump over a big RO zs_pool region, besides we never use ->name > (unlike migrate_lock) so having it at offset 0 is unnecessary. What's special about offset 0 though? My understanding is that we want to put RW-mostly and RO-mostly members apart to land in different cachelines to avoid unnecessary bouncing. Also we want the elements that are usually accessed together next to one another. Placing the RW lock right next to the RO size classes seems like it will result in unnecessary cacheline bouncing. For example, if a CPU holds the lock and dirties the cacheline then all other CPUs have to drop it and fetch it again when accessing the size classes. > zs_pool_stats and compaction_in_progress are modified only > during compaction, which we do not run in parallel (only one > CPU can do compaction at a time), so we can do something like But other CPUs can read compaction_in_progress, even if they don't modify it. > > --- > > struct zs_pool { > - const char *name; > + /* protect page/zspage migration */ > + rwlock_t migrate_lock; > > struct size_class *size_class[ZS_SIZE_CLASSES]; > > > - struct kmem_cache *handle_cachep; > - struct kmem_cache *zspage_cachep; > > atomic_long_t pages_allocated; > > - struct zs_pool_stats stats; > + struct kmem_cache *handle_cachep; > + struct kmem_cache *zspage_cachep; > > /* Compact classes */ > struct shrinker *shrinker; > @@ -223,9 +223,9 @@ struct zs_pool { > #ifdef CONFIG_COMPACTION > struct work_struct free_work; > #endif > - /* protect page/zspage migration */ > - rwlock_t migrate_lock; > + const char *name; > atomic_t compaction_in_progress; > + struct zs_pool_stats stats; > };
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 817626a351f8..2f8a2b139919 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -204,7 +204,8 @@ struct link_free { }; struct zs_pool { - const char *name; + /* protect page/zspage migration */ + rwlock_t migrate_lock; struct size_class *size_class[ZS_SIZE_CLASSES]; struct kmem_cache *handle_cachep; @@ -213,6 +214,7 @@ struct zs_pool { atomic_long_t pages_allocated; struct zs_pool_stats stats; + atomic_t compaction_in_progress; /* Compact classes */ struct shrinker *shrinker; @@ -223,11 +225,35 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct work_struct free_work; #endif - /* protect page/zspage migration */ - rwlock_t migrate_lock; - atomic_t compaction_in_progress; + + const char *name; }; +static void pool_write_unlock(struct zs_pool *pool) +{ + write_unlock(&pool->migrate_lock); +} + +static void pool_write_lock(struct zs_pool *pool) +{ + write_lock(&pool->migrate_lock); +} + +static void pool_read_unlock(struct zs_pool *pool) +{ + read_unlock(&pool->migrate_lock); +} + +static void pool_read_lock(struct zs_pool *pool) +{ + read_lock(&pool->migrate_lock); +} + +static bool pool_lock_is_contended(struct zs_pool *pool) +{ + return rwlock_is_contended(&pool->migrate_lock); +} + static inline void zpdesc_set_first(struct zpdesc *zpdesc) { SetPagePrivate(zpdesc_page(zpdesc)); @@ -1206,7 +1232,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, BUG_ON(in_interrupt()); /* It guarantees it can get zspage from handle safely */ - read_lock(&pool->migrate_lock); + pool_read_lock(pool); obj = handle_to_obj(handle); obj_to_location(obj, &zpdesc, &obj_idx); zspage = get_zspage(zpdesc); @@ -1218,7 +1244,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, * which is smaller granularity. */ migrate_read_lock(zspage); - read_unlock(&pool->migrate_lock); + pool_read_unlock(pool); class = zspage_class(pool, zspage); off = offset_in_page(class->size * obj_idx); @@ -1453,13 +1479,13 @@ void zs_free(struct zs_pool *pool, unsigned long handle) * The pool->migrate_lock protects the race with zpage's migration * so it's safe to get the page from handle. */ - read_lock(&pool->migrate_lock); + pool_read_lock(pool); obj = handle_to_obj(handle); obj_to_zpdesc(obj, &f_zpdesc); zspage = get_zspage(f_zpdesc); class = zspage_class(pool, zspage); spin_lock(&class->lock); - read_unlock(&pool->migrate_lock); + pool_read_unlock(pool); class_stat_sub(class, ZS_OBJS_INUSE, 1); obj_free(class->size, obj); @@ -1796,7 +1822,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * The pool migrate_lock protects the race between zpage migration * and zs_free. */ - write_lock(&pool->migrate_lock); + pool_write_lock(pool); class = zspage_class(pool, zspage); /* @@ -1833,7 +1859,7 @@ static int zs_page_migrate(struct page *newpage, struct page *page, * Since we complete the data copy and set up new zspage structure, * it's okay to release migration_lock. */ - write_unlock(&pool->migrate_lock); + pool_write_unlock(pool); spin_unlock(&class->lock); migrate_write_unlock(zspage); @@ -1956,7 +1982,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, * protect the race between zpage migration and zs_free * as well as zpage allocation/free */ - write_lock(&pool->migrate_lock); + pool_write_lock(pool); spin_lock(&class->lock); while (zs_can_compact(class)) { int fg; @@ -1983,14 +2009,14 @@ static unsigned long __zs_compact(struct zs_pool *pool, src_zspage = NULL; if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100 - || rwlock_is_contended(&pool->migrate_lock)) { + || pool_lock_is_contended(pool)) { putback_zspage(class, dst_zspage); dst_zspage = NULL; spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + pool_write_unlock(pool); cond_resched(); - write_lock(&pool->migrate_lock); + pool_write_lock(pool); spin_lock(&class->lock); } } @@ -2002,7 +2028,7 @@ static unsigned long __zs_compact(struct zs_pool *pool, putback_zspage(class, dst_zspage); spin_unlock(&class->lock); - write_unlock(&pool->migrate_lock); + pool_write_unlock(pool); return pages_freed; }
We currently have a mix of migrate_{read,write}_lock() helpers that lock zspages, but it's zs_pool that actually has a ->migrate_lock access to which is opene-coded. Factor out pool migrate locking into helpers, zspage migration locking API will be renamed to reduce confusion. Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org> --- mm/zsmalloc.c | 56 +++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 15 deletions(-)