[RFC,2/6] zsmalloc: make zspage lock preemptible

Message ID	20250127080254.1302026-3-senozhatsky@chromium.org (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Sergey Senozhatsky <senozhatsky@chromium.org> To: Andrew Morton <akpm@linux-foundation.org>, Minchan Kim <minchan@kernel.org>, Johannes Weiner <hannes@cmpxchg.org>, Yosry Ahmed <yosry.ahmed@linux.dev>, Nhat Pham <nphamcs@gmail.com> Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Sergey Senozhatsky <senozhatsky@chromium.org> Subject: [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible Date: Mon, 27 Jan 2025 16:59:27 +0900 Message-ID: <20250127080254.1302026-3-senozhatsky@chromium.org> In-Reply-To: <20250127080254.1302026-1-senozhatsky@chromium.org> References: <20250127080254.1302026-1-senozhatsky@chromium.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	zsmalloc: make zsmalloc preemptible \| expand [RFC,0/6] zsmalloc: make zsmalloc preemptible [RFC,1/6] zram: deffer slot free notification [RFC,2/6] zsmalloc: make zspage lock preemptible [RFC,3/6] zsmalloc: convert to sleepable pool lock [RFC,4/6] zsmalloc: make class lock sleepable [RFC,5/6] zsmalloc: introduce handle mapping API [RFC,6/6] zram: switch over to zshandle mapping API

Message ID

20250127080254.1302026-3-senozhatsky@chromium.org (mailing list archive)

State

New

Headers

From: Sergey Senozhatsky <senozhatsky@chromium.org>
To: Andrew Morton <akpm@linux-foundation.org>,
	Minchan Kim <minchan@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Yosry Ahmed <yosry.ahmed@linux.dev>,
	Nhat Pham <nphamcs@gmail.com>
Cc: linux-mm@kvack.org,
	linux-kernel@vger.kernel.org,
	Sergey Senozhatsky <senozhatsky@chromium.org>
Subject: [RFC PATCH 2/6] zsmalloc: make zspage lock preemptible
Date: Mon, 27 Jan 2025 16:59:27 +0900
Message-ID: <20250127080254.1302026-3-senozhatsky@chromium.org>
In-Reply-To: <20250127080254.1302026-1-senozhatsky@chromium.org>
References: <20250127080254.1302026-1-senozhatsky@chromium.org>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
Sender: owner-linux-mm@kvack.org
Precedence: bulk

Series

zsmalloc: make zsmalloc preemptible | expand

Commit Message

Sergey Senozhatsky Jan. 27, 2025, 7:59 a.m. UTC

Switch over from rwlock_t to a atomic_t variable that takes
negative value when the page is under migration, or positive
values when the page is used by zsmalloc users (object map,
etc.)  Using a rwsem per-zspage is a little too memory heavy,
a simple atomic_t should suffice, after all we only need to
mark zspage as either used-for-write or used-for-read.  This
is needed to make zsmalloc preemtible in the future.

Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
---
 mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
 1 file changed, 66 insertions(+), 46 deletions(-)

Comments

Uros Bizjak Jan. 27, 2025, 8:23 p.m. UTC | #1

On 27. 01. 25 08:59, Sergey Senozhatsky wrote:
> Switch over from rwlock_t to a atomic_t variable that takes
> negative value when the page is under migration, or positive
> values when the page is used by zsmalloc users (object map,
> etc.)  Using a rwsem per-zspage is a little too memory heavy,
> a simple atomic_t should suffice, after all we only need to
> mark zspage as either used-for-write or used-for-read.  This
> is needed to make zsmalloc preemtible in the future.
> 
> Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
> ---
>   mm/zsmalloc.c | 112 +++++++++++++++++++++++++++++---------------------
>   1 file changed, 66 insertions(+), 46 deletions(-)
> 
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 817626a351f8..28a75bfbeaa6 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -257,6 +257,9 @@ static inline void free_zpdesc(struct zpdesc *zpdesc)
>   	__free_page(page);
>   }
>   
> +#define ZS_PAGE_UNLOCKED	0
> +#define ZS_PAGE_WRLOCKED	-1
> +
>   struct zspage {
>   	struct {
>   		unsigned int huge:HUGE_BITS;
> @@ -269,7 +272,7 @@ struct zspage {
>   	struct zpdesc *first_zpdesc;
>   	struct list_head list; /* fullness list */
>   	struct zs_pool *pool;
> -	rwlock_t lock;
> +	atomic_t lock;
>   };
>   
>   struct mapping_area {
> @@ -290,11 +293,53 @@ static bool ZsHugePage(struct zspage *zspage)
>   	return zspage->huge;
>   }
>   
> -static void migrate_lock_init(struct zspage *zspage);
> -static void migrate_read_lock(struct zspage *zspage);
> -static void migrate_read_unlock(struct zspage *zspage);
> -static void migrate_write_lock(struct zspage *zspage);
> -static void migrate_write_unlock(struct zspage *zspage);
> +static void zspage_lock_init(struct zspage *zspage)
> +{
> +	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
> +}
> +
> +static void zspage_read_lock(struct zspage *zspage)
> +{
> +	atomic_t *lock = &zspage->lock;
> +	int old;
> +
> +	while (1) {
> +		old = atomic_read(lock);
> +		if (old == ZS_PAGE_WRLOCKED) {
> +			cpu_relax();
> +			continue;
> +		}
> +
> +		if (atomic_cmpxchg(lock, old, old + 1) == old)
> +			return;

You can use atomic_try_cmpxchg() here:

if (atomic_try_cmpxchg(lock, &old, old + 1))
         return;

> +
> +		cpu_relax();
> +	}
> +}
> +
> +static void zspage_read_unlock(struct zspage *zspage)
> +{
> +	atomic_dec(&zspage->lock);
> +}
> +
> +static void zspage_write_lock(struct zspage *zspage)
> +{
> +	atomic_t *lock = &zspage->lock;
> +	int old;
> +
> +	while (1) {
> +		old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
> +		if (old == ZS_PAGE_UNLOCKED)
> +			return;

Also, the above code can be rewritten as:

while (1) {
         old = ZS_PAGE_UNLOCKED;
         if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED))
                 return;	
> +
> +		cpu_relax();
> +	}
> +}

The above change will result in a slightly better generated asm.

Uros.

Sergey Senozhatsky Jan. 28, 2025, 12:29 a.m. UTC | #2

On (25/01/27 21:23), Uros Bizjak wrote:
> > +static void zspage_read_lock(struct zspage *zspage)
> > +{
> > +	atomic_t *lock = &zspage->lock;
> > +	int old;
> > +
> > +	while (1) {
> > +		old = atomic_read(lock);
> > +		if (old == ZS_PAGE_WRLOCKED) {
> > +			cpu_relax();
> > +			continue;
> > +		}
> > +
> > +		if (atomic_cmpxchg(lock, old, old + 1) == old)
> > +			return;
> 
> You can use atomic_try_cmpxchg() here:
> 
> if (atomic_try_cmpxchg(lock, &old, old + 1))
>         return;
> 
> > +
> > +		cpu_relax();
> > +	}
> > +}
> > +
> > +static void zspage_read_unlock(struct zspage *zspage)
> > +{
> > +	atomic_dec(&zspage->lock);
> > +}
> > +
> > +static void zspage_write_lock(struct zspage *zspage)
> > +{
> > +	atomic_t *lock = &zspage->lock;
> > +	int old;
> > +
> > +	while (1) {
> > +		old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
> > +		if (old == ZS_PAGE_UNLOCKED)
> > +			return;
> 
> Also, the above code can be rewritten as:
> 
> while (1) {
>         old = ZS_PAGE_UNLOCKED;
>         if (atomic_try_cmpxchg (lock, &old, ZS_PAGE_WRLOCKED))
>                 return;	
> > +
> > +		cpu_relax();
> > +	}
> > +}
> 
> The above change will result in a slightly better generated asm.

Thanks, I'll take a look for the next version.

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 817626a351f8..28a75bfbeaa6 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -257,6 +257,9 @@  static inline void free_zpdesc(struct zpdesc *zpdesc)
 	__free_page(page);
 }
 
+#define ZS_PAGE_UNLOCKED	0
+#define ZS_PAGE_WRLOCKED	-1
+
 struct zspage {
 	struct {
 		unsigned int huge:HUGE_BITS;
@@ -269,7 +272,7 @@  struct zspage {
 	struct zpdesc *first_zpdesc;
 	struct list_head list; /* fullness list */
 	struct zs_pool *pool;
-	rwlock_t lock;
+	atomic_t lock;
 };
 
 struct mapping_area {
@@ -290,11 +293,53 @@  static bool ZsHugePage(struct zspage *zspage)
 	return zspage->huge;
 }
 
-static void migrate_lock_init(struct zspage *zspage);
-static void migrate_read_lock(struct zspage *zspage);
-static void migrate_read_unlock(struct zspage *zspage);
-static void migrate_write_lock(struct zspage *zspage);
-static void migrate_write_unlock(struct zspage *zspage);
+static void zspage_lock_init(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
+
+static void zspage_read_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_read(lock);
+		if (old == ZS_PAGE_WRLOCKED) {
+			cpu_relax();
+			continue;
+		}
+
+		if (atomic_cmpxchg(lock, old, old + 1) == old)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_read_unlock(struct zspage *zspage)
+{
+	atomic_dec(&zspage->lock);
+}
+
+static void zspage_write_lock(struct zspage *zspage)
+{
+	atomic_t *lock = &zspage->lock;
+	int old;
+
+	while (1) {
+		old = atomic_cmpxchg(lock, ZS_PAGE_UNLOCKED, ZS_PAGE_WRLOCKED);
+		if (old == ZS_PAGE_UNLOCKED)
+			return;
+
+		cpu_relax();
+	}
+}
+
+static void zspage_write_unlock(struct zspage *zspage)
+{
+	atomic_set(&zspage->lock, ZS_PAGE_UNLOCKED);
+}
 
 #ifdef CONFIG_COMPACTION
 static void kick_deferred_free(struct zs_pool *pool);
@@ -992,7 +1037,7 @@  static struct zspage *alloc_zspage(struct zs_pool *pool,
 		return NULL;
 
 	zspage->magic = ZSPAGE_MAGIC;
-	migrate_lock_init(zspage);
+	zspage_lock_init(zspage);
 
 	for (i = 0; i < class->pages_per_zspage; i++) {
 		struct zpdesc *zpdesc;
@@ -1217,7 +1262,7 @@  void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	 * zs_unmap_object API so delegate the locking from class to zspage
 	 * which is smaller granularity.
 	 */
-	migrate_read_lock(zspage);
+	zspage_read_lock(zspage);
 	read_unlock(&pool->migrate_lock);
 
 	class = zspage_class(pool, zspage);
@@ -1277,7 +1322,7 @@  void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 	}
 	local_unlock(&zs_map_area.lock);
 
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
@@ -1671,18 +1716,18 @@  static void lock_zspage(struct zspage *zspage)
 	/*
 	 * Pages we haven't locked yet can be migrated off the list while we're
 	 * trying to lock them, so we need to be careful and only attempt to
-	 * lock each page under migrate_read_lock(). Otherwise, the page we lock
+	 * lock each page under zspage_read_lock(). Otherwise, the page we lock
 	 * may no longer belong to the zspage. This means that we may wait for
 	 * the wrong page to unlock, so we must take a reference to the page
-	 * prior to waiting for it to unlock outside migrate_read_lock().
+	 * prior to waiting for it to unlock outside zspage_read_lock().
 	 */
 	while (1) {
-		migrate_read_lock(zspage);
+		zspage_read_lock(zspage);
 		zpdesc = get_first_zpdesc(zspage);
 		if (zpdesc_trylock(zpdesc))
 			break;
 		zpdesc_get(zpdesc);
-		migrate_read_unlock(zspage);
+		zspage_read_unlock(zspage);
 		zpdesc_wait_locked(zpdesc);
 		zpdesc_put(zpdesc);
 	}
@@ -1693,41 +1738,16 @@  static void lock_zspage(struct zspage *zspage)
 			curr_zpdesc = zpdesc;
 		} else {
 			zpdesc_get(zpdesc);
-			migrate_read_unlock(zspage);
+			zspage_read_unlock(zspage);
 			zpdesc_wait_locked(zpdesc);
 			zpdesc_put(zpdesc);
-			migrate_read_lock(zspage);
+			zspage_read_lock(zspage);
 		}
 	}
-	migrate_read_unlock(zspage);
+	zspage_read_unlock(zspage);
 }
 #endif /* CONFIG_COMPACTION */
 
-static void migrate_lock_init(struct zspage *zspage)
-{
-	rwlock_init(&zspage->lock);
-}
-
-static void migrate_read_lock(struct zspage *zspage) __acquires(&zspage->lock)
-{
-	read_lock(&zspage->lock);
-}
-
-static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
-{
-	read_unlock(&zspage->lock);
-}
-
-static void migrate_write_lock(struct zspage *zspage)
-{
-	write_lock(&zspage->lock);
-}
-
-static void migrate_write_unlock(struct zspage *zspage)
-{
-	write_unlock(&zspage->lock);
-}
-
 #ifdef CONFIG_COMPACTION
 
 static const struct movable_operations zsmalloc_mops;
@@ -1803,8 +1823,8 @@  static int zs_page_migrate(struct page *newpage, struct page *page,
 	 * the class lock protects zpage alloc/free in the zspage.
 	 */
 	spin_lock(&class->lock);
-	/* the migrate_write_lock protects zpage access via zs_map_object */
-	migrate_write_lock(zspage);
+	/* the zspage_write_lock protects zpage access via zs_map_object */
+	zspage_write_lock(zspage);
 
 	offset = get_first_obj_offset(zpdesc);
 	s_addr = kmap_local_zpdesc(zpdesc);
@@ -1835,7 +1855,7 @@  static int zs_page_migrate(struct page *newpage, struct page *page,
 	 */
 	write_unlock(&pool->migrate_lock);
 	spin_unlock(&class->lock);
-	migrate_write_unlock(zspage);
+	zspage_write_unlock(zspage);
 
 	zpdesc_get(newzpdesc);
 	if (zpdesc_zone(newzpdesc) != zpdesc_zone(zpdesc)) {
@@ -1971,9 +1991,9 @@  static unsigned long __zs_compact(struct zs_pool *pool,
 		if (!src_zspage)
 			break;
 
-		migrate_write_lock(src_zspage);
+		zspage_write_lock(src_zspage);
 		migrate_zspage(pool, src_zspage, dst_zspage);
-		migrate_write_unlock(src_zspage);
+		zspage_write_unlock(src_zspage);
 
 		fg = putback_zspage(class, src_zspage);
 		if (fg == ZS_INUSE_RATIO_0) {

[RFC,2/6] zsmalloc: make zspage lock preemptible

Commit Message

Comments

Patch