diff mbox series

[v4,02/22] mm/zsmalloc: use zpdesc in trylock_zspage/lock_zspage

Message ID 20240729112534.3416707-3-alexs@kernel.org (mailing list archive)
State New
Headers show
Series mm/zsmalloc: add zpdesc memory descriptor for zswap.zpool | expand

Commit Message

alexs@kernel.org July 29, 2024, 11:25 a.m. UTC
From: Alex Shi <alexs@kernel.org>

To use zpdesc in trylock_zspage/lock_zspage funcs, we add couple of helpers:
zpdesc_lock/zpdesc_unlock/zpdesc_trylock/zpdesc_wait_locked and
zpdesc_get/zpdesc_put for this purpose.

Here we use the folio series func in guts for 2 reasons, one zswap.zpool
only get single page, and use folio could save some compound_head checking;
two, folio_put could bypass devmap checking that we don't need.

Originally-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Signed-off-by: Alex Shi <alexs@kernel.org>
---
 mm/zpdesc.h   | 30 ++++++++++++++++++++++++
 mm/zsmalloc.c | 64 ++++++++++++++++++++++++++++++++++-----------------
 2 files changed, 73 insertions(+), 21 deletions(-)

Comments

Vishal Moola (Oracle) Aug. 2, 2024, 7:02 p.m. UTC | #1
On Mon, Jul 29, 2024 at 07:25:14PM +0800, alexs@kernel.org wrote:
> From: Alex Shi <alexs@kernel.org>
> 
> To use zpdesc in trylock_zspage/lock_zspage funcs, we add couple of helpers:
> zpdesc_lock/zpdesc_unlock/zpdesc_trylock/zpdesc_wait_locked and
> zpdesc_get/zpdesc_put for this purpose.

You should always include the "()" following function names. It just
makes everything more readable.

> Here we use the folio series func in guts for 2 reasons, one zswap.zpool
> only get single page, and use folio could save some compound_head checking;
> two, folio_put could bypass devmap checking that we don't need.
> 
> Originally-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> Signed-off-by: Alex Shi <alexs@kernel.org>
> ---
>  mm/zpdesc.h   | 30 ++++++++++++++++++++++++
>  mm/zsmalloc.c | 64 ++++++++++++++++++++++++++++++++++-----------------
>  2 files changed, 73 insertions(+), 21 deletions(-)
> 
> diff --git a/mm/zpdesc.h b/mm/zpdesc.h
> index 2dbef231f616..3b04197cec9d 100644
> --- a/mm/zpdesc.h
> +++ b/mm/zpdesc.h
> @@ -63,4 +63,34 @@ static_assert(sizeof(struct zpdesc) <= sizeof(struct page));
>  	const struct page *:		(const struct zpdesc *)(p),	\
>  	struct page *:			(struct zpdesc *)(p)))
>  
> +static inline void zpdesc_lock(struct zpdesc *zpdesc)
> +{
> +	folio_lock(zpdesc_folio(zpdesc));
> +}
> +
> +static inline bool zpdesc_trylock(struct zpdesc *zpdesc)
> +{
> +	return folio_trylock(zpdesc_folio(zpdesc));
> +}
> +
> +static inline void zpdesc_unlock(struct zpdesc *zpdesc)
> +{
> +	folio_unlock(zpdesc_folio(zpdesc));
> +}
> +
> +static inline void zpdesc_wait_locked(struct zpdesc *zpdesc)
> +{
> +	folio_wait_locked(zpdesc_folio(zpdesc));
> +}

The more I look at zsmalloc, the more skeptical I get about it "needing"
the folio_lock. At a glance it seems like a zspage already has its own lock,
and the migration doesn't appear to be truly physical? There's probably
something I'm missing... it would make this code a lot simpler to drop
many of the folio locks.

> +
> +static inline void zpdesc_get(struct zpdesc *zpdesc)
> +{
> +	folio_get(zpdesc_folio(zpdesc));
> +}
> +
> +static inline void zpdesc_put(struct zpdesc *zpdesc)
> +{
> +	folio_put(zpdesc_folio(zpdesc));
> +}
> +
>  #endif
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index a532851025f9..243677a9c6d2 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -433,13 +433,17 @@ static __maybe_unused int is_first_page(struct page *page)
>  	return PagePrivate(page);
>  }
>  
> +static int is_first_zpdesc(struct zpdesc *zpdesc)
> +{
> +	return PagePrivate(zpdesc_page(zpdesc));
> +}
> +

I feel like we might not even need to use the PG_private flag for
zpages? It seems to me like its just used for sanity checking. Can
zpage->first_page ever not point to the first zpdesc?

For the purpose of introducing the memdesc its fine to continue using
it; just some food for thought.
Alex Shi Aug. 5, 2024, 7:55 a.m. UTC | #2
On 8/3/24 3:02 AM, Vishal Moola wrote:
> On Mon, Jul 29, 2024 at 07:25:14PM +0800, alexs@kernel.org wrote:
>> From: Alex Shi <alexs@kernel.org>
>>
>> To use zpdesc in trylock_zspage/lock_zspage funcs, we add couple of helpers:
>> zpdesc_lock/zpdesc_unlock/zpdesc_trylock/zpdesc_wait_locked and
>> zpdesc_get/zpdesc_put for this purpose.
> 
> You should always include the "()" following function names. It just
> makes everything more readable.

Thanks for reminder, I will update the commit log.

> 
>> Here we use the folio series func in guts for 2 reasons, one zswap.zpool
>> only get single page, and use folio could save some compound_head checking;
>> two, folio_put could bypass devmap checking that we don't need.
>>
>> Originally-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
>> Signed-off-by: Alex Shi <alexs@kernel.org>
>> ---
>>  mm/zpdesc.h   | 30 ++++++++++++++++++++++++
>>  mm/zsmalloc.c | 64 ++++++++++++++++++++++++++++++++++-----------------
>>  2 files changed, 73 insertions(+), 21 deletions(-)
>>
>> diff --git a/mm/zpdesc.h b/mm/zpdesc.h
>> index 2dbef231f616..3b04197cec9d 100644
>> --- a/mm/zpdesc.h
>> +++ b/mm/zpdesc.h
>> @@ -63,4 +63,34 @@ static_assert(sizeof(struct zpdesc) <= sizeof(struct page));
>>  	const struct page *:		(const struct zpdesc *)(p),	\
>>  	struct page *:			(struct zpdesc *)(p)))
>>  
>> +static inline void zpdesc_lock(struct zpdesc *zpdesc)
>> +{
>> +	folio_lock(zpdesc_folio(zpdesc));
>> +}
>> +
>> +static inline bool zpdesc_trylock(struct zpdesc *zpdesc)
>> +{
>> +	return folio_trylock(zpdesc_folio(zpdesc));
>> +}
>> +
>> +static inline void zpdesc_unlock(struct zpdesc *zpdesc)
>> +{
>> +	folio_unlock(zpdesc_folio(zpdesc));
>> +}
>> +
>> +static inline void zpdesc_wait_locked(struct zpdesc *zpdesc)
>> +{
>> +	folio_wait_locked(zpdesc_folio(zpdesc));
>> +}
> 
> The more I look at zsmalloc, the more skeptical I get about it "needing"
> the folio_lock. At a glance it seems like a zspage already has its own lock,
> and the migration doesn't appear to be truly physical? There's probably
> something I'm missing... it would make this code a lot simpler to drop
> many of the folio locks.

folio series could save about 6.3% object code... Anyway I don't insist on
it. Just want a double confirm, could we keep the code size saving? :)

> 
>> +
>> +static inline void zpdesc_get(struct zpdesc *zpdesc)
>> +{
>> +	folio_get(zpdesc_folio(zpdesc));
>> +}
>> +
>> +static inline void zpdesc_put(struct zpdesc *zpdesc)
>> +{
>> +	folio_put(zpdesc_folio(zpdesc));
>> +}
>> +
>>  #endif
>> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
>> index a532851025f9..243677a9c6d2 100644
>> --- a/mm/zsmalloc.c
>> +++ b/mm/zsmalloc.c
>> @@ -433,13 +433,17 @@ static __maybe_unused int is_first_page(struct page *page)
>>  	return PagePrivate(page);
>>  }
>>  
>> +static int is_first_zpdesc(struct zpdesc *zpdesc)
>> +{
>> +	return PagePrivate(zpdesc_page(zpdesc));
>> +}
>> +
> 
> I feel like we might not even need to use the PG_private flag for
> zpages? It seems to me like its just used for sanity checking. Can
> zpage->first_page ever not point to the first zpdesc?

Yes, the PG_private is only for sanity checking now. But zspage.first_zpdesc
are still used widely and must point to the first subpage. 
I believe we could safely remove this page flag, maybe next patchset?

> 
> For the purpose of introducing the memdesc its fine to continue using
> it; just some food for thought.

Yes.
 
Thanks a lot! :)
diff mbox series

Patch

diff --git a/mm/zpdesc.h b/mm/zpdesc.h
index 2dbef231f616..3b04197cec9d 100644
--- a/mm/zpdesc.h
+++ b/mm/zpdesc.h
@@ -63,4 +63,34 @@  static_assert(sizeof(struct zpdesc) <= sizeof(struct page));
 	const struct page *:		(const struct zpdesc *)(p),	\
 	struct page *:			(struct zpdesc *)(p)))
 
+static inline void zpdesc_lock(struct zpdesc *zpdesc)
+{
+	folio_lock(zpdesc_folio(zpdesc));
+}
+
+static inline bool zpdesc_trylock(struct zpdesc *zpdesc)
+{
+	return folio_trylock(zpdesc_folio(zpdesc));
+}
+
+static inline void zpdesc_unlock(struct zpdesc *zpdesc)
+{
+	folio_unlock(zpdesc_folio(zpdesc));
+}
+
+static inline void zpdesc_wait_locked(struct zpdesc *zpdesc)
+{
+	folio_wait_locked(zpdesc_folio(zpdesc));
+}
+
+static inline void zpdesc_get(struct zpdesc *zpdesc)
+{
+	folio_get(zpdesc_folio(zpdesc));
+}
+
+static inline void zpdesc_put(struct zpdesc *zpdesc)
+{
+	folio_put(zpdesc_folio(zpdesc));
+}
+
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a532851025f9..243677a9c6d2 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -433,13 +433,17 @@  static __maybe_unused int is_first_page(struct page *page)
 	return PagePrivate(page);
 }
 
+static int is_first_zpdesc(struct zpdesc *zpdesc)
+{
+	return PagePrivate(zpdesc_page(zpdesc));
+}
+
 /* Protected by class->lock */
 static inline int get_zspage_inuse(struct zspage *zspage)
 {
 	return zspage->inuse;
 }
 
-
 static inline void mod_zspage_inuse(struct zspage *zspage, int val)
 {
 	zspage->inuse += val;
@@ -453,6 +457,14 @@  static inline struct page *get_first_page(struct zspage *zspage)
 	return first_page;
 }
 
+static struct zpdesc *get_first_zpdesc(struct zspage *zspage)
+{
+	struct zpdesc *first_zpdesc = zspage->first_zpdesc;
+
+	VM_BUG_ON_PAGE(!is_first_zpdesc(first_zpdesc), zpdesc_page(first_zpdesc));
+	return first_zpdesc;
+}
+
 #define FIRST_OBJ_PAGE_TYPE_MASK	0xffff
 
 static inline void reset_first_obj_offset(struct page *page)
@@ -745,6 +757,16 @@  static struct page *get_next_page(struct page *page)
 	return (struct page *)page->index;
 }
 
+static struct zpdesc *get_next_zpdesc(struct zpdesc *zpdesc)
+{
+	struct zspage *zspage = get_zspage(zpdesc_page(zpdesc));
+
+	if (unlikely(ZsHugePage(zspage)))
+		return NULL;
+
+	return zpdesc->next;
+}
+
 /**
  * obj_to_location - get (<page>, <obj_idx>) from encoded object value
  * @obj: the encoded object value
@@ -815,11 +837,11 @@  static void reset_page(struct page *page)
 
 static int trylock_zspage(struct zspage *zspage)
 {
-	struct page *cursor, *fail;
+	struct zpdesc *cursor, *fail;
 
-	for (cursor = get_first_page(zspage); cursor != NULL; cursor =
-					get_next_page(cursor)) {
-		if (!trylock_page(cursor)) {
+	for (cursor = get_first_zpdesc(zspage); cursor != NULL; cursor =
+					get_next_zpdesc(cursor)) {
+		if (!zpdesc_trylock(cursor)) {
 			fail = cursor;
 			goto unlock;
 		}
@@ -827,9 +849,9 @@  static int trylock_zspage(struct zspage *zspage)
 
 	return 1;
 unlock:
-	for (cursor = get_first_page(zspage); cursor != fail; cursor =
-					get_next_page(cursor))
-		unlock_page(cursor);
+	for (cursor = get_first_zpdesc(zspage); cursor != fail; cursor =
+					get_next_zpdesc(cursor))
+		zpdesc_unlock(cursor);
 
 	return 0;
 }
@@ -1658,7 +1680,7 @@  static int putback_zspage(struct size_class *class, struct zspage *zspage)
  */
 static void lock_zspage(struct zspage *zspage)
 {
-	struct page *curr_page, *page;
+	struct zpdesc *curr_zpdesc, *zpdesc;
 
 	/*
 	 * Pages we haven't locked yet can be migrated off the list while we're
@@ -1670,24 +1692,24 @@  static void lock_zspage(struct zspage *zspage)
 	 */
 	while (1) {
 		migrate_read_lock(zspage);
-		page = get_first_page(zspage);
-		if (trylock_page(page))
+		zpdesc = get_first_zpdesc(zspage);
+		if (zpdesc_trylock(zpdesc))
 			break;
-		get_page(page);
+		zpdesc_get(zpdesc);
 		migrate_read_unlock(zspage);
-		wait_on_page_locked(page);
-		put_page(page);
+		zpdesc_wait_locked(zpdesc);
+		zpdesc_put(zpdesc);
 	}
 
-	curr_page = page;
-	while ((page = get_next_page(curr_page))) {
-		if (trylock_page(page)) {
-			curr_page = page;
+	curr_zpdesc = zpdesc;
+	while ((zpdesc = get_next_zpdesc(curr_zpdesc))) {
+		if (zpdesc_trylock(zpdesc)) {
+			curr_zpdesc = zpdesc;
 		} else {
-			get_page(page);
+			zpdesc_get(zpdesc);
 			migrate_read_unlock(zspage);
-			wait_on_page_locked(page);
-			put_page(page);
+			zpdesc_wait_locked(zpdesc);
+			zpdesc_put(zpdesc);
 			migrate_read_lock(zspage);
 		}
 	}