diff mbox series

[RFC,9/9] mm: zswap: use zswap_entry_free() for partially initialized entries

Message ID 20240325235018.2028408-10-yosryahmed@google.com (mailing list archive)
State New
Headers show
Series zswap: store zero-filled pages more efficiently | expand

Commit Message

Yosry Ahmed March 25, 2024, 11:50 p.m. UTC
zswap_entry_free() performs four types of cleanups before freeing a
zswap_entry:
- Deletes the entry from the LRU.
- Frees compressed memory.
- Puts the pool reference.
- Uncharges the compressed memory and puts the objcg.

zswap_entry_free() always expects a fully initialized entry. Allow
zswap_entry_free() to handle partially initialized entries by making it
possible to identify what cleanups are needed as follows:
- Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
  the entry is allocated. Points are NULL and length is zero upon
  initialization.
- Use zswap_entry.length to identify if there is compressed memory to
  free. This is possible now that zero-filled pages are handled
  separately, so a length of zero means we did not successfully compress
  the page.
- Only initialize entry->objcg after the memory is charged in
  zswap_store().

With this in place, use zswap_entry_free() in the failure path of
zswap_store() to cleanup partially initialized entries. This simplifies
the cleanup code in zswap_store(). While we are at it, rename the
remaining cleanup labels to more meaningful names.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
---
 mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 32 deletions(-)

Comments

Chengming Zhou March 28, 2024, 8:31 a.m. UTC | #1
On 2024/3/26 07:50, Yosry Ahmed wrote:
> zswap_entry_free() performs four types of cleanups before freeing a
> zswap_entry:
> - Deletes the entry from the LRU.
> - Frees compressed memory.
> - Puts the pool reference.
> - Uncharges the compressed memory and puts the objcg.
> 
> zswap_entry_free() always expects a fully initialized entry. Allow
> zswap_entry_free() to handle partially initialized entries by making it
> possible to identify what cleanups are needed as follows:
> - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
>   the entry is allocated. Points are NULL and length is zero upon
>   initialization.
> - Use zswap_entry.length to identify if there is compressed memory to
>   free. This is possible now that zero-filled pages are handled
>   separately, so a length of zero means we did not successfully compress
>   the page.
> - Only initialize entry->objcg after the memory is charged in
>   zswap_store().
> 
> With this in place, use zswap_entry_free() in the failure path of
> zswap_store() to cleanup partially initialized entries. This simplifies
> the cleanup code in zswap_store(). While we are at it, rename the
> remaining cleanup labels to more meaningful names.

Not sure if it's worthwhile to clean up the fail path with the normal path
gets a little verbose.

> 
> Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> ---
>  mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
>  1 file changed, 30 insertions(+), 32 deletions(-)
> 
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 9357328d940af..c50f9df230ca3 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
>  **********************************/
>  static struct kmem_cache *zswap_entry_cache;
>  
> -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> +static struct zswap_entry *zswap_entry_cache_alloc(int nid)
>  {
>  	struct zswap_entry *entry;
> -	entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> -	if (!entry)
> -		return NULL;
> +	entry = kmem_cache_alloc_node(zswap_entry_cache,
> +				      GFP_KERNEL | __GFP_ZERO, nid);
> +	if (entry)
> +		INIT_LIST_HEAD(&entry->lru);
>  	return entry;
>  }
>  
> @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
>  
>  static void zswap_entry_free(struct zswap_entry *entry)
>  {
> -	zswap_lru_del(&zswap_list_lru, entry);
> -	zpool_free(zswap_find_zpool(entry), entry->handle);
> -	zswap_pool_put(entry->pool);
> +	if (!list_empty(&entry->lru))
> +		zswap_lru_del(&zswap_list_lru, entry);
> +	if (entry->length)
> +		zpool_free(zswap_find_zpool(entry), entry->handle);
> +	if (entry->pool)
> +		zswap_pool_put(entry->pool);
>  	if (entry->objcg) {
>  		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
>  		obj_cgroup_put(entry->objcg);
> @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
>  		return false;
>  
>  	if (!zswap_enabled)
> -		goto check_old;
> +		goto erase_old;
>  
>  	/* Check cgroup limits */
>  	objcg = get_obj_cgroup_from_folio(folio);
> @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
>  		memcg = get_mem_cgroup_from_objcg(objcg);
>  		if (shrink_memcg(memcg)) {
>  			mem_cgroup_put(memcg);
> -			goto reject;
> +			goto put_objcg;
>  		}
>  		mem_cgroup_put(memcg);
>  	}
>  
>  	if (zswap_is_folio_zero_filled(folio)) {
>  		if (zswap_store_zero_filled(tree, offset, objcg))
> -			goto reject;
> +			goto put_objcg;
>  		goto stored;
>  	}
>  
>  	if (!zswap_non_zero_filled_pages_enabled)
> -		goto reject;
> +		goto put_objcg;
>  
>  	if (!zswap_check_limit())
> -		goto reject;
> +		goto put_objcg;
>  
> -	entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> +	entry = zswap_entry_cache_alloc(folio_nid(folio));
>  	if (!entry) {
>  		zswap_reject_kmemcache_fail++;
> -		goto reject;
> +		goto put_objcg;
>  	}
>  
> -	/* if entry is successfully added, it keeps the reference */
>  	entry->pool = zswap_pool_current_get();
>  	if (!entry->pool)
> -		goto freepage;
> +		goto free_entry;
>  
>  	if (objcg) {
>  		memcg = get_mem_cgroup_from_objcg(objcg);
>  		if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
>  			mem_cgroup_put(memcg);
> -			goto put_pool;
> +			goto free_entry;
>  		}
>  		mem_cgroup_put(memcg);
>  	}
>  
>  	if (!zswap_compress(folio, entry))
> -		goto put_pool;
> -
> -	entry->swpentry = swp;
> -	entry->objcg = objcg;
> +		goto free_entry;
>  
>  	if (zswap_tree_store(tree, offset, entry))
> -		goto store_failed;
> +		goto free_entry;
>  
> -	if (objcg)
> +	if (objcg) {
>  		obj_cgroup_charge_zswap(objcg, entry->length);
> +		entry->objcg = objcg;
> +	}
>  
>  	/*
>  	 * We finish initializing the entry while it's already in xarray.
> @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
>  	 *    The publishing order matters to prevent writeback from seeing
>  	 *    an incoherent entry.
>  	 */
> -	INIT_LIST_HEAD(&entry->lru);
> +	entry->swpentry = swp;
>  	zswap_lru_add(&zswap_list_lru, entry);
>  
>  stored:
> @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
>  
>  	return true;
>  
> -store_failed:
> -	zpool_free(zswap_find_zpool(entry), entry->handle);
> -put_pool:
> -	zswap_pool_put(entry->pool);
> -freepage:
> -	zswap_entry_cache_free(entry);
> -reject:
> +free_entry:
> +	zswap_entry_free(entry);
> +put_objcg:
>  	obj_cgroup_put(objcg);
>  	if (zswap_pool_reached_full)
>  		queue_work(shrink_wq, &zswap_shrink_work);
> -check_old:
> +erase_old:
>  	/*
>  	 * If the zswap store fails or zswap is disabled, we must invalidate the
>  	 * possibly stale entry which was previously stored at this offset.
Yosry Ahmed March 28, 2024, 6:49 p.m. UTC | #2
On Thu, Mar 28, 2024 at 1:31 AM Chengming Zhou <chengming.zhou@linux.dev> wrote:
>
> On 2024/3/26 07:50, Yosry Ahmed wrote:
> > zswap_entry_free() performs four types of cleanups before freeing a
> > zswap_entry:
> > - Deletes the entry from the LRU.
> > - Frees compressed memory.
> > - Puts the pool reference.
> > - Uncharges the compressed memory and puts the objcg.
> >
> > zswap_entry_free() always expects a fully initialized entry. Allow
> > zswap_entry_free() to handle partially initialized entries by making it
> > possible to identify what cleanups are needed as follows:
> > - Allocate entries with __GFP_ZERO and initialize zswap_entry.lru when
> >   the entry is allocated. Points are NULL and length is zero upon
> >   initialization.
> > - Use zswap_entry.length to identify if there is compressed memory to
> >   free. This is possible now that zero-filled pages are handled
> >   separately, so a length of zero means we did not successfully compress
> >   the page.
> > - Only initialize entry->objcg after the memory is charged in
> >   zswap_store().
> >
> > With this in place, use zswap_entry_free() in the failure path of
> > zswap_store() to cleanup partially initialized entries. This simplifies
> > the cleanup code in zswap_store(). While we are at it, rename the
> > remaining cleanup labels to more meaningful names.
>
> Not sure if it's worthwhile to clean up the fail path with the normal path
> gets a little verbose.

I was on the fence about this, so I  thought I would just send it and
see what others think.

On one hand it makes the initialization more robust because the
zswap_entry is always in a clean identifiable state, but on the other
hand it adds churn to the normal path as you noticed. Also after
removing handling zero-length entries from the failure path it isn't
that bad without this patch anyway.

So if no one else thinks this is useful, I will drop the patch in the
next version.

Thanks!

>
> >
> > Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
> > ---
> >  mm/zswap.c | 62 ++++++++++++++++++++++++++----------------------------
> >  1 file changed, 30 insertions(+), 32 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index 9357328d940af..c50f9df230ca3 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -774,12 +774,13 @@ void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
> >  **********************************/
> >  static struct kmem_cache *zswap_entry_cache;
> >
> > -static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
> > +static struct zswap_entry *zswap_entry_cache_alloc(int nid)
> >  {
> >       struct zswap_entry *entry;
> > -     entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
> > -     if (!entry)
> > -             return NULL;
> > +     entry = kmem_cache_alloc_node(zswap_entry_cache,
> > +                                   GFP_KERNEL | __GFP_ZERO, nid);
> > +     if (entry)
> > +             INIT_LIST_HEAD(&entry->lru);
> >       return entry;
> >  }
> >
> > @@ -795,9 +796,12 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> >
> >  static void zswap_entry_free(struct zswap_entry *entry)
> >  {
> > -     zswap_lru_del(&zswap_list_lru, entry);
> > -     zpool_free(zswap_find_zpool(entry), entry->handle);
> > -     zswap_pool_put(entry->pool);
> > +     if (!list_empty(&entry->lru))
> > +             zswap_lru_del(&zswap_list_lru, entry);
> > +     if (entry->length)
> > +             zpool_free(zswap_find_zpool(entry), entry->handle);
> > +     if (entry->pool)
> > +             zswap_pool_put(entry->pool);
> >       if (entry->objcg) {
> >               obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> >               obj_cgroup_put(entry->objcg);
> > @@ -1447,7 +1451,7 @@ bool zswap_store(struct folio *folio)
> >               return false;
> >
> >       if (!zswap_enabled)
> > -             goto check_old;
> > +             goto erase_old;
> >
> >       /* Check cgroup limits */
> >       objcg = get_obj_cgroup_from_folio(folio);
> > @@ -1455,54 +1459,52 @@ bool zswap_store(struct folio *folio)
> >               memcg = get_mem_cgroup_from_objcg(objcg);
> >               if (shrink_memcg(memcg)) {
> >                       mem_cgroup_put(memcg);
> > -                     goto reject;
> > +                     goto put_objcg;
> >               }
> >               mem_cgroup_put(memcg);
> >       }
> >
> >       if (zswap_is_folio_zero_filled(folio)) {
> >               if (zswap_store_zero_filled(tree, offset, objcg))
> > -                     goto reject;
> > +                     goto put_objcg;
> >               goto stored;
> >       }
> >
> >       if (!zswap_non_zero_filled_pages_enabled)
> > -             goto reject;
> > +             goto put_objcg;
> >
> >       if (!zswap_check_limit())
> > -             goto reject;
> > +             goto put_objcg;
> >
> > -     entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
> > +     entry = zswap_entry_cache_alloc(folio_nid(folio));
> >       if (!entry) {
> >               zswap_reject_kmemcache_fail++;
> > -             goto reject;
> > +             goto put_objcg;
> >       }
> >
> > -     /* if entry is successfully added, it keeps the reference */
> >       entry->pool = zswap_pool_current_get();
> >       if (!entry->pool)
> > -             goto freepage;
> > +             goto free_entry;
> >
> >       if (objcg) {
> >               memcg = get_mem_cgroup_from_objcg(objcg);
> >               if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
> >                       mem_cgroup_put(memcg);
> > -                     goto put_pool;
> > +                     goto free_entry;
> >               }
> >               mem_cgroup_put(memcg);
> >       }
> >
> >       if (!zswap_compress(folio, entry))
> > -             goto put_pool;
> > -
> > -     entry->swpentry = swp;
> > -     entry->objcg = objcg;
> > +             goto free_entry;
> >
> >       if (zswap_tree_store(tree, offset, entry))
> > -             goto store_failed;
> > +             goto free_entry;
> >
> > -     if (objcg)
> > +     if (objcg) {
> >               obj_cgroup_charge_zswap(objcg, entry->length);
> > +             entry->objcg = objcg;
> > +     }
> >
> >       /*
> >        * We finish initializing the entry while it's already in xarray.
> > @@ -1514,7 +1516,7 @@ bool zswap_store(struct folio *folio)
> >        *    The publishing order matters to prevent writeback from seeing
> >        *    an incoherent entry.
> >        */
> > -     INIT_LIST_HEAD(&entry->lru);
> > +     entry->swpentry = swp;
> >       zswap_lru_add(&zswap_list_lru, entry);
> >
> >  stored:
> > @@ -1525,17 +1527,13 @@ bool zswap_store(struct folio *folio)
> >
> >       return true;
> >
> > -store_failed:
> > -     zpool_free(zswap_find_zpool(entry), entry->handle);
> > -put_pool:
> > -     zswap_pool_put(entry->pool);
> > -freepage:
> > -     zswap_entry_cache_free(entry);
> > -reject:
> > +free_entry:
> > +     zswap_entry_free(entry);
> > +put_objcg:
> >       obj_cgroup_put(objcg);
> >       if (zswap_pool_reached_full)
> >               queue_work(shrink_wq, &zswap_shrink_work);
> > -check_old:
> > +erase_old:
> >       /*
> >        * If the zswap store fails or zswap is disabled, we must invalidate the
> >        * possibly stale entry which was previously stored at this offset.
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index 9357328d940af..c50f9df230ca3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -774,12 +774,13 @@  void zswap_memcg_offline_cleanup(struct mem_cgroup *memcg)
 **********************************/
 static struct kmem_cache *zswap_entry_cache;
 
-static struct zswap_entry *zswap_entry_cache_alloc(gfp_t gfp, int nid)
+static struct zswap_entry *zswap_entry_cache_alloc(int nid)
 {
 	struct zswap_entry *entry;
-	entry = kmem_cache_alloc_node(zswap_entry_cache, gfp, nid);
-	if (!entry)
-		return NULL;
+	entry = kmem_cache_alloc_node(zswap_entry_cache,
+				      GFP_KERNEL | __GFP_ZERO, nid);
+	if (entry)
+		INIT_LIST_HEAD(&entry->lru);
 	return entry;
 }
 
@@ -795,9 +796,12 @@  static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
 
 static void zswap_entry_free(struct zswap_entry *entry)
 {
-	zswap_lru_del(&zswap_list_lru, entry);
-	zpool_free(zswap_find_zpool(entry), entry->handle);
-	zswap_pool_put(entry->pool);
+	if (!list_empty(&entry->lru))
+		zswap_lru_del(&zswap_list_lru, entry);
+	if (entry->length)
+		zpool_free(zswap_find_zpool(entry), entry->handle);
+	if (entry->pool)
+		zswap_pool_put(entry->pool);
 	if (entry->objcg) {
 		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
 		obj_cgroup_put(entry->objcg);
@@ -1447,7 +1451,7 @@  bool zswap_store(struct folio *folio)
 		return false;
 
 	if (!zswap_enabled)
-		goto check_old;
+		goto erase_old;
 
 	/* Check cgroup limits */
 	objcg = get_obj_cgroup_from_folio(folio);
@@ -1455,54 +1459,52 @@  bool zswap_store(struct folio *folio)
 		memcg = get_mem_cgroup_from_objcg(objcg);
 		if (shrink_memcg(memcg)) {
 			mem_cgroup_put(memcg);
-			goto reject;
+			goto put_objcg;
 		}
 		mem_cgroup_put(memcg);
 	}
 
 	if (zswap_is_folio_zero_filled(folio)) {
 		if (zswap_store_zero_filled(tree, offset, objcg))
-			goto reject;
+			goto put_objcg;
 		goto stored;
 	}
 
 	if (!zswap_non_zero_filled_pages_enabled)
-		goto reject;
+		goto put_objcg;
 
 	if (!zswap_check_limit())
-		goto reject;
+		goto put_objcg;
 
-	entry = zswap_entry_cache_alloc(GFP_KERNEL, folio_nid(folio));
+	entry = zswap_entry_cache_alloc(folio_nid(folio));
 	if (!entry) {
 		zswap_reject_kmemcache_fail++;
-		goto reject;
+		goto put_objcg;
 	}
 
-	/* if entry is successfully added, it keeps the reference */
 	entry->pool = zswap_pool_current_get();
 	if (!entry->pool)
-		goto freepage;
+		goto free_entry;
 
 	if (objcg) {
 		memcg = get_mem_cgroup_from_objcg(objcg);
 		if (memcg_list_lru_alloc(memcg, &zswap_list_lru, GFP_KERNEL)) {
 			mem_cgroup_put(memcg);
-			goto put_pool;
+			goto free_entry;
 		}
 		mem_cgroup_put(memcg);
 	}
 
 	if (!zswap_compress(folio, entry))
-		goto put_pool;
-
-	entry->swpentry = swp;
-	entry->objcg = objcg;
+		goto free_entry;
 
 	if (zswap_tree_store(tree, offset, entry))
-		goto store_failed;
+		goto free_entry;
 
-	if (objcg)
+	if (objcg) {
 		obj_cgroup_charge_zswap(objcg, entry->length);
+		entry->objcg = objcg;
+	}
 
 	/*
 	 * We finish initializing the entry while it's already in xarray.
@@ -1514,7 +1516,7 @@  bool zswap_store(struct folio *folio)
 	 *    The publishing order matters to prevent writeback from seeing
 	 *    an incoherent entry.
 	 */
-	INIT_LIST_HEAD(&entry->lru);
+	entry->swpentry = swp;
 	zswap_lru_add(&zswap_list_lru, entry);
 
 stored:
@@ -1525,17 +1527,13 @@  bool zswap_store(struct folio *folio)
 
 	return true;
 
-store_failed:
-	zpool_free(zswap_find_zpool(entry), entry->handle);
-put_pool:
-	zswap_pool_put(entry->pool);
-freepage:
-	zswap_entry_cache_free(entry);
-reject:
+free_entry:
+	zswap_entry_free(entry);
+put_objcg:
 	obj_cgroup_put(objcg);
 	if (zswap_pool_reached_full)
 		queue_work(shrink_wq, &zswap_shrink_work);
-check_old:
+erase_old:
 	/*
 	 * If the zswap store fails or zswap is disabled, we must invalidate the
 	 * possibly stale entry which was previously stored at this offset.