diff mbox series

[v3,1/7] mm: zswap: add pool shrinking mechanism

Message ID 20230612093815.133504-2-cerasuolodomenico@gmail.com (mailing list archive)
State New
Headers show
Series mm: zswap: move writeback LRU from zpool to zswap | expand

Commit Message

Domenico Cerasuolo June 12, 2023, 9:38 a.m. UTC
Each zpool driver (zbud, z3fold and zsmalloc) implements its own shrink
function, which is called from zpool_shrink. However, with this commit,
a unified shrink function is added to zswap. The ultimate goal is to
eliminate the need for zpool_shrink once all zpool implementations have
dropped their shrink code.

To ensure the functionality of each commit, this change focuses solely
on adding the mechanism itself. No modifications are made to
the backends, meaning that functionally, there are no immediate changes.
The zswap mechanism will only come into effect once the backends have
removed their shrink code. The subsequent commits will address the
modifications needed in the backends.

Acked-by: Nhat Pham <nphamcs@gmail.com>
Tested-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
---
 mm/zswap.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 92 insertions(+), 5 deletions(-)

Comments

Johannes Weiner June 12, 2023, 1:34 p.m. UTC | #1
On Mon, Jun 12, 2023 at 11:38:09AM +0200, Domenico Cerasuolo wrote:
> Each zpool driver (zbud, z3fold and zsmalloc) implements its own shrink
> function, which is called from zpool_shrink. However, with this commit,
> a unified shrink function is added to zswap. The ultimate goal is to
> eliminate the need for zpool_shrink once all zpool implementations have
> dropped their shrink code.
> 
> To ensure the functionality of each commit, this change focuses solely
> on adding the mechanism itself. No modifications are made to
> the backends, meaning that functionally, there are no immediate changes.
> The zswap mechanism will only come into effect once the backends have
> removed their shrink code. The subsequent commits will address the
> modifications needed in the backends.
> 
> Acked-by: Nhat Pham <nphamcs@gmail.com>
> Tested-by: Yosry Ahmed <yosryahmed@google.com>
> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Yosry Ahmed June 12, 2023, 6:18 p.m. UTC | #2
On Mon, Jun 12, 2023 at 2:38 AM Domenico Cerasuolo
<cerasuolodomenico@gmail.com> wrote:
>
> Each zpool driver (zbud, z3fold and zsmalloc) implements its own shrink
> function, which is called from zpool_shrink. However, with this commit,
> a unified shrink function is added to zswap. The ultimate goal is to
> eliminate the need for zpool_shrink once all zpool implementations have
> dropped their shrink code.
>
> To ensure the functionality of each commit, this change focuses solely
> on adding the mechanism itself. No modifications are made to
> the backends, meaning that functionally, there are no immediate changes.
> The zswap mechanism will only come into effect once the backends have
> removed their shrink code. The subsequent commits will address the
> modifications needed in the backends.
>
> Acked-by: Nhat Pham <nphamcs@gmail.com>
> Tested-by: Yosry Ahmed <yosryahmed@google.com>

Reviewed-by: Yosry Ahmed <yosryahmed@google.com>

> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> ---
>  mm/zswap.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 92 insertions(+), 5 deletions(-)
>
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 9fa86265f6dd..0024ec5ed574 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -154,6 +154,12 @@ struct crypto_acomp_ctx {
>         struct mutex *mutex;
>  };
>
> +/*
> + * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
> + * The only case where lru_lock is not acquired while holding tree.lock is
> + * when a zswap_entry is taken off the lru for writeback, in that case it
> + * needs to be verified that it's still valid in the tree.
> + */
>  struct zswap_pool {
>         struct zpool *zpool;
>         struct crypto_acomp_ctx __percpu *acomp_ctx;
> @@ -163,6 +169,8 @@ struct zswap_pool {
>         struct work_struct shrink_work;
>         struct hlist_node node;
>         char tfm_name[CRYPTO_MAX_ALG_NAME];
> +       struct list_head lru;
> +       spinlock_t lru_lock;
>  };
>
>  /*
> @@ -180,10 +188,12 @@ struct zswap_pool {
>   *            be held while changing the refcount.  Since the lock must
>   *            be held, there is no reason to also make refcount atomic.
>   * length - the length in bytes of the compressed page data.  Needed during
> - *          decompression. For a same value filled page length is 0.
> + *          decompression. For a same value filled page length is 0, and both
> + *          pool and lru are invalid and must be ignored.
>   * pool - the zswap_pool the entry's data is in
>   * handle - zpool allocation handle that stores the compressed page data
>   * value - value of the same-value filled pages which have same content
> + * lru - handle to the pool's lru used to evict pages.
>   */
>  struct zswap_entry {
>         struct rb_node rbnode;
> @@ -196,6 +206,7 @@ struct zswap_entry {
>                 unsigned long value;
>         };
>         struct obj_cgroup *objcg;
> +       struct list_head lru;
>  };
>
>  struct zswap_header {
> @@ -368,6 +379,12 @@ static void zswap_free_entry(struct zswap_entry *entry)
>         if (!entry->length)
>                 atomic_dec(&zswap_same_filled_pages);
>         else {
> +               /* zpool_evictable will be removed once all 3 backends have migrated */
> +               if (!zpool_evictable(entry->pool->zpool)) {
> +                       spin_lock(&entry->pool->lru_lock);
> +                       list_del(&entry->lru);
> +                       spin_unlock(&entry->pool->lru_lock);
> +               }
>                 zpool_free(entry->pool->zpool, entry->handle);
>                 zswap_pool_put(entry->pool);
>         }
> @@ -588,14 +605,72 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
>         return NULL;
>  }
>
> +static int zswap_reclaim_entry(struct zswap_pool *pool)
> +{
> +       struct zswap_header *zhdr;
> +       struct zswap_entry *entry;
> +       struct zswap_tree *tree;
> +       pgoff_t swpoffset;
> +       int ret;
> +
> +       /* Get an entry off the LRU */
> +       spin_lock(&pool->lru_lock);
> +       if (list_empty(&pool->lru)) {
> +               spin_unlock(&pool->lru_lock);
> +               return -EINVAL;
> +       }
> +       entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> +       list_del_init(&entry->lru);
> +       zhdr = zpool_map_handle(pool->zpool, entry->handle, ZPOOL_MM_RO);
> +       tree = zswap_trees[swp_type(zhdr->swpentry)];
> +       zpool_unmap_handle(pool->zpool, entry->handle);
> +       /*
> +        * Once the lru lock is dropped, the entry might get freed. The
> +        * swpoffset is copied to the stack, and entry isn't deref'd again
> +        * until the entry is verified to still be alive in the tree.
> +        */
> +       swpoffset = swp_offset(zhdr->swpentry);
> +       spin_unlock(&pool->lru_lock);
> +
> +       /* Check for invalidate() race */
> +       spin_lock(&tree->lock);
> +       if (entry != zswap_rb_search(&tree->rbroot, swpoffset)) {
> +               ret = -EAGAIN;
> +               goto unlock;
> +       }
> +       /* Hold a reference to prevent a free during writeback */
> +       zswap_entry_get(entry);
> +       spin_unlock(&tree->lock);
> +
> +       ret = zswap_writeback_entry(pool->zpool, entry->handle);
> +
> +       spin_lock(&tree->lock);
> +       if (ret) {
> +               /* Writeback failed, put entry back on LRU */
> +               spin_lock(&pool->lru_lock);
> +               list_move(&entry->lru, &pool->lru);
> +               spin_unlock(&pool->lru_lock);
> +       }
> +
> +       /* Drop local reference */
> +       zswap_entry_put(tree, entry);
> +unlock:
> +       spin_unlock(&tree->lock);
> +       return ret ? -EAGAIN : 0;
> +}
> +
>  static void shrink_worker(struct work_struct *w)
>  {
>         struct zswap_pool *pool = container_of(w, typeof(*pool),
>                                                 shrink_work);
>         int ret, failures = 0;
>
> +       /* zpool_evictable will be removed once all 3 backends have migrated */
>         do {
> -               ret = zpool_shrink(pool->zpool, 1, NULL);
> +               if (zpool_evictable(pool->zpool))
> +                       ret = zpool_shrink(pool->zpool, 1, NULL);
> +               else
> +                       ret = zswap_reclaim_entry(pool);
>                 if (ret) {
>                         zswap_reject_reclaim_fail++;
>                         if (ret != -EAGAIN)
> @@ -659,6 +734,8 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
>          */
>         kref_init(&pool->kref);
>         INIT_LIST_HEAD(&pool->list);
> +       INIT_LIST_HEAD(&pool->lru);
> +       spin_lock_init(&pool->lru_lock);
>         INIT_WORK(&pool->shrink_work, shrink_worker);
>
>         zswap_pool_debug("created", pool);
> @@ -1274,7 +1351,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>         }
>
>         /* store */
> -       hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
> +       hlen = sizeof(zhdr);
>         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
>         if (zpool_malloc_support_movable(entry->pool->zpool))
>                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> @@ -1317,6 +1394,12 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
>                         zswap_entry_put(tree, dupentry);
>                 }
>         } while (ret == -EEXIST);
> +       /* zpool_evictable will be removed once all 3 backends have migrated */
> +       if (entry->length && !zpool_evictable(entry->pool->zpool)) {
> +               spin_lock(&entry->pool->lru_lock);
> +               list_add(&entry->lru, &entry->pool->lru);
> +               spin_unlock(&entry->pool->lru_lock);
> +       }
>         spin_unlock(&tree->lock);
>
>         /* update stats */
> @@ -1398,8 +1481,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>         /* decompress */
>         dlen = PAGE_SIZE;
>         src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
> -       if (zpool_evictable(entry->pool->zpool))
> -               src += sizeof(struct zswap_header);
> +       src += sizeof(struct zswap_header);
>
>         if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
>                 memcpy(tmp, src, entry->length);
> @@ -1432,6 +1514,11 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
>         if (!ret && zswap_exclusive_loads_enabled) {
>                 zswap_invalidate_entry(tree, entry);
>                 *exclusive = true;
> +       } else if (entry->length && !zpool_evictable(entry->pool->zpool)) {
> +               /* zpool_evictable will be removed once all 3 backends have migrated */
> +               spin_lock(&entry->pool->lru_lock);
> +               list_move(&entry->lru, &entry->pool->lru);
> +               spin_unlock(&entry->pool->lru_lock);
>         }
>         spin_unlock(&tree->lock);
>
> --
> 2.34.1
>
Sergey Senozhatsky June 18, 2023, 4:38 a.m. UTC | #3
On (23/06/12 11:38), Domenico Cerasuolo wrote:
> +static int zswap_reclaim_entry(struct zswap_pool *pool)
> +{
> +	struct zswap_header *zhdr;
> +	struct zswap_entry *entry;
> +	struct zswap_tree *tree;
> +	pgoff_t swpoffset;
> +	int ret;
> +
> +	/* Get an entry off the LRU */
> +	spin_lock(&pool->lru_lock);
> +	if (list_empty(&pool->lru)) {
> +		spin_unlock(&pool->lru_lock);
> +		return -EINVAL;
> +	}
> +	entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> +	list_del_init(&entry->lru);

A quick question: should we zswap_entry_get() here?
Yosry Ahmed June 18, 2023, 4:48 a.m. UTC | #4
On Sat, Jun 17, 2023 at 9:39 PM Sergey Senozhatsky
<senozhatsky@chromium.org> wrote:
>
> On (23/06/12 11:38), Domenico Cerasuolo wrote:
> > +static int zswap_reclaim_entry(struct zswap_pool *pool)
> > +{
> > +     struct zswap_header *zhdr;
> > +     struct zswap_entry *entry;
> > +     struct zswap_tree *tree;
> > +     pgoff_t swpoffset;
> > +     int ret;
> > +
> > +     /* Get an entry off the LRU */
> > +     spin_lock(&pool->lru_lock);
> > +     if (list_empty(&pool->lru)) {
> > +             spin_unlock(&pool->lru_lock);
> > +             return -EINVAL;
> > +     }
> > +     entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > +     list_del_init(&entry->lru);
>
> A quick question: should we zswap_entry_get() here?

We need to hold the tree lock for that, and the lock ordering is tree
lock -> lru lock. If we try to grab the tree lock here we may
deadlock. In the next code block we release the lru_lock, hold the
tree lock, make sure the entry is still valid in the tree, then do
zswap_entry_get().
Sergey Senozhatsky June 18, 2023, 5:25 a.m. UTC | #5
On (23/06/17 21:48), Yosry Ahmed wrote:
> > On (23/06/12 11:38), Domenico Cerasuolo wrote:
> > > +static int zswap_reclaim_entry(struct zswap_pool *pool)
> > > +{
> > > +     struct zswap_header *zhdr;
> > > +     struct zswap_entry *entry;
> > > +     struct zswap_tree *tree;
> > > +     pgoff_t swpoffset;
> > > +     int ret;
> > > +
> > > +     /* Get an entry off the LRU */
> > > +     spin_lock(&pool->lru_lock);
> > > +     if (list_empty(&pool->lru)) {
> > > +             spin_unlock(&pool->lru_lock);
> > > +             return -EINVAL;
> > > +     }
> > > +     entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > > +     list_del_init(&entry->lru);
> >
> > A quick question: should we zswap_entry_get() here?
> 
> We need to hold the tree lock for that, and the lock ordering is tree
> lock -> lru lock. If we try to grab the tree lock here we may
> deadlock.

We can deadlock doing this?

	lock tree_lock
	lock lru_lock
	list_del_init
	unlock lru_lock
	entry_get
	unlock tree_lock
	writeback
Yosry Ahmed June 18, 2023, 5:28 a.m. UTC | #6
On Sat, Jun 17, 2023 at 10:25 PM Sergey Senozhatsky
<senozhatsky@chromium.org> wrote:
>
> On (23/06/17 21:48), Yosry Ahmed wrote:
> > > On (23/06/12 11:38), Domenico Cerasuolo wrote:
> > > > +static int zswap_reclaim_entry(struct zswap_pool *pool)
> > > > +{
> > > > +     struct zswap_header *zhdr;
> > > > +     struct zswap_entry *entry;
> > > > +     struct zswap_tree *tree;
> > > > +     pgoff_t swpoffset;
> > > > +     int ret;
> > > > +
> > > > +     /* Get an entry off the LRU */
> > > > +     spin_lock(&pool->lru_lock);
> > > > +     if (list_empty(&pool->lru)) {
> > > > +             spin_unlock(&pool->lru_lock);
> > > > +             return -EINVAL;
> > > > +     }
> > > > +     entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > > > +     list_del_init(&entry->lru);
> > >
> > > A quick question: should we zswap_entry_get() here?
> >
> > We need to hold the tree lock for that, and the lock ordering is tree
> > lock -> lru lock. If we try to grab the tree lock here we may
> > deadlock.
>
> We can deadlock doing this?
>
>         lock tree_lock
>         lock lru_lock
>         list_del_init
>         unlock lru_lock
>         entry_get
>         unlock tree_lock
>         writeback

We don't know which tree the zswap entry belongs to until we get from
the LRU -- so we can't hold the tree lock before getting the entry
from the lru (and to get the entry from the LRU we need the lru_lock).
Sergey Senozhatsky June 18, 2023, 5:30 a.m. UTC | #7
On (23/06/17 22:28), Yosry Ahmed wrote:
> > On (23/06/17 21:48), Yosry Ahmed wrote:
> > > > On (23/06/12 11:38), Domenico Cerasuolo wrote:
> > > > > +static int zswap_reclaim_entry(struct zswap_pool *pool)
> > > > > +{
> > > > > +     struct zswap_header *zhdr;
> > > > > +     struct zswap_entry *entry;
> > > > > +     struct zswap_tree *tree;
> > > > > +     pgoff_t swpoffset;
> > > > > +     int ret;
> > > > > +
> > > > > +     /* Get an entry off the LRU */
> > > > > +     spin_lock(&pool->lru_lock);
> > > > > +     if (list_empty(&pool->lru)) {
> > > > > +             spin_unlock(&pool->lru_lock);
> > > > > +             return -EINVAL;
> > > > > +     }
> > > > > +     entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > > > > +     list_del_init(&entry->lru);
> > > >
> > > > A quick question: should we zswap_entry_get() here?
> > >
> > > We need to hold the tree lock for that, and the lock ordering is tree
> > > lock -> lru lock. If we try to grab the tree lock here we may
> > > deadlock.
> >
> > We can deadlock doing this?
> >
> >         lock tree_lock
> >         lock lru_lock
> >         list_del_init
> >         unlock lru_lock
> >         entry_get
> >         unlock tree_lock
> >         writeback
> 
> We don't know which tree the zswap entry belongs to

Oh... I must be blind. You are right. Sorry for the noise.
Yosry Ahmed June 18, 2023, 5:31 a.m. UTC | #8
On Sat, Jun 17, 2023 at 10:30 PM Sergey Senozhatsky
<senozhatsky@chromium.org> wrote:
>
> On (23/06/17 22:28), Yosry Ahmed wrote:
> > > On (23/06/17 21:48), Yosry Ahmed wrote:
> > > > > On (23/06/12 11:38), Domenico Cerasuolo wrote:
> > > > > > +static int zswap_reclaim_entry(struct zswap_pool *pool)
> > > > > > +{
> > > > > > +     struct zswap_header *zhdr;
> > > > > > +     struct zswap_entry *entry;
> > > > > > +     struct zswap_tree *tree;
> > > > > > +     pgoff_t swpoffset;
> > > > > > +     int ret;
> > > > > > +
> > > > > > +     /* Get an entry off the LRU */
> > > > > > +     spin_lock(&pool->lru_lock);
> > > > > > +     if (list_empty(&pool->lru)) {
> > > > > > +             spin_unlock(&pool->lru_lock);
> > > > > > +             return -EINVAL;
> > > > > > +     }
> > > > > > +     entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
> > > > > > +     list_del_init(&entry->lru);
> > > > >
> > > > > A quick question: should we zswap_entry_get() here?
> > > >
> > > > We need to hold the tree lock for that, and the lock ordering is tree
> > > > lock -> lru lock. If we try to grab the tree lock here we may
> > > > deadlock.
> > >
> > > We can deadlock doing this?
> > >
> > >         lock tree_lock
> > >         lock lru_lock
> > >         list_del_init
> > >         unlock lru_lock
> > >         entry_get
> > >         unlock tree_lock
> > >         writeback
> >
> > We don't know which tree the zswap entry belongs to
>
> Oh... I must be blind. You are right. Sorry for the noise.

Don't blame yourself, it is fairly convoluted :)
diff mbox series

Patch

diff --git a/mm/zswap.c b/mm/zswap.c
index 9fa86265f6dd..0024ec5ed574 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -154,6 +154,12 @@  struct crypto_acomp_ctx {
 	struct mutex *mutex;
 };
 
+/*
+ * The lock ordering is zswap_tree.lock -> zswap_pool.lru_lock.
+ * The only case where lru_lock is not acquired while holding tree.lock is
+ * when a zswap_entry is taken off the lru for writeback, in that case it
+ * needs to be verified that it's still valid in the tree.
+ */
 struct zswap_pool {
 	struct zpool *zpool;
 	struct crypto_acomp_ctx __percpu *acomp_ctx;
@@ -163,6 +169,8 @@  struct zswap_pool {
 	struct work_struct shrink_work;
 	struct hlist_node node;
 	char tfm_name[CRYPTO_MAX_ALG_NAME];
+	struct list_head lru;
+	spinlock_t lru_lock;
 };
 
 /*
@@ -180,10 +188,12 @@  struct zswap_pool {
  *            be held while changing the refcount.  Since the lock must
  *            be held, there is no reason to also make refcount atomic.
  * length - the length in bytes of the compressed page data.  Needed during
- *          decompression. For a same value filled page length is 0.
+ *          decompression. For a same value filled page length is 0, and both
+ *          pool and lru are invalid and must be ignored.
  * pool - the zswap_pool the entry's data is in
  * handle - zpool allocation handle that stores the compressed page data
  * value - value of the same-value filled pages which have same content
+ * lru - handle to the pool's lru used to evict pages.
  */
 struct zswap_entry {
 	struct rb_node rbnode;
@@ -196,6 +206,7 @@  struct zswap_entry {
 		unsigned long value;
 	};
 	struct obj_cgroup *objcg;
+	struct list_head lru;
 };
 
 struct zswap_header {
@@ -368,6 +379,12 @@  static void zswap_free_entry(struct zswap_entry *entry)
 	if (!entry->length)
 		atomic_dec(&zswap_same_filled_pages);
 	else {
+		/* zpool_evictable will be removed once all 3 backends have migrated */
+		if (!zpool_evictable(entry->pool->zpool)) {
+			spin_lock(&entry->pool->lru_lock);
+			list_del(&entry->lru);
+			spin_unlock(&entry->pool->lru_lock);
+		}
 		zpool_free(entry->pool->zpool, entry->handle);
 		zswap_pool_put(entry->pool);
 	}
@@ -588,14 +605,72 @@  static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
 	return NULL;
 }
 
+static int zswap_reclaim_entry(struct zswap_pool *pool)
+{
+	struct zswap_header *zhdr;
+	struct zswap_entry *entry;
+	struct zswap_tree *tree;
+	pgoff_t swpoffset;
+	int ret;
+
+	/* Get an entry off the LRU */
+	spin_lock(&pool->lru_lock);
+	if (list_empty(&pool->lru)) {
+		spin_unlock(&pool->lru_lock);
+		return -EINVAL;
+	}
+	entry = list_last_entry(&pool->lru, struct zswap_entry, lru);
+	list_del_init(&entry->lru);
+	zhdr = zpool_map_handle(pool->zpool, entry->handle, ZPOOL_MM_RO);
+	tree = zswap_trees[swp_type(zhdr->swpentry)];
+	zpool_unmap_handle(pool->zpool, entry->handle);
+	/*
+	 * Once the lru lock is dropped, the entry might get freed. The
+	 * swpoffset is copied to the stack, and entry isn't deref'd again
+	 * until the entry is verified to still be alive in the tree.
+	 */
+	swpoffset = swp_offset(zhdr->swpentry);
+	spin_unlock(&pool->lru_lock);
+
+	/* Check for invalidate() race */
+	spin_lock(&tree->lock);
+	if (entry != zswap_rb_search(&tree->rbroot, swpoffset)) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+	/* Hold a reference to prevent a free during writeback */
+	zswap_entry_get(entry);
+	spin_unlock(&tree->lock);
+
+	ret = zswap_writeback_entry(pool->zpool, entry->handle);
+
+	spin_lock(&tree->lock);
+	if (ret) {
+		/* Writeback failed, put entry back on LRU */
+		spin_lock(&pool->lru_lock);
+		list_move(&entry->lru, &pool->lru);
+		spin_unlock(&pool->lru_lock);
+	}
+
+	/* Drop local reference */
+	zswap_entry_put(tree, entry);
+unlock:
+	spin_unlock(&tree->lock);
+	return ret ? -EAGAIN : 0;
+}
+
 static void shrink_worker(struct work_struct *w)
 {
 	struct zswap_pool *pool = container_of(w, typeof(*pool),
 						shrink_work);
 	int ret, failures = 0;
 
+	/* zpool_evictable will be removed once all 3 backends have migrated */
 	do {
-		ret = zpool_shrink(pool->zpool, 1, NULL);
+		if (zpool_evictable(pool->zpool))
+			ret = zpool_shrink(pool->zpool, 1, NULL);
+		else
+			ret = zswap_reclaim_entry(pool);
 		if (ret) {
 			zswap_reject_reclaim_fail++;
 			if (ret != -EAGAIN)
@@ -659,6 +734,8 @@  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
 	 */
 	kref_init(&pool->kref);
 	INIT_LIST_HEAD(&pool->list);
+	INIT_LIST_HEAD(&pool->lru);
+	spin_lock_init(&pool->lru_lock);
 	INIT_WORK(&pool->shrink_work, shrink_worker);
 
 	zswap_pool_debug("created", pool);
@@ -1274,7 +1351,7 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 	}
 
 	/* store */
-	hlen = zpool_evictable(entry->pool->zpool) ? sizeof(zhdr) : 0;
+	hlen = sizeof(zhdr);
 	gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
 	if (zpool_malloc_support_movable(entry->pool->zpool))
 		gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
@@ -1317,6 +1394,12 @@  static int zswap_frontswap_store(unsigned type, pgoff_t offset,
 			zswap_entry_put(tree, dupentry);
 		}
 	} while (ret == -EEXIST);
+	/* zpool_evictable will be removed once all 3 backends have migrated */
+	if (entry->length && !zpool_evictable(entry->pool->zpool)) {
+		spin_lock(&entry->pool->lru_lock);
+		list_add(&entry->lru, &entry->pool->lru);
+		spin_unlock(&entry->pool->lru_lock);
+	}
 	spin_unlock(&tree->lock);
 
 	/* update stats */
@@ -1398,8 +1481,7 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 	/* decompress */
 	dlen = PAGE_SIZE;
 	src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
-	if (zpool_evictable(entry->pool->zpool))
-		src += sizeof(struct zswap_header);
+	src += sizeof(struct zswap_header);
 
 	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
 		memcpy(tmp, src, entry->length);
@@ -1432,6 +1514,11 @@  static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 	if (!ret && zswap_exclusive_loads_enabled) {
 		zswap_invalidate_entry(tree, entry);
 		*exclusive = true;
+	} else if (entry->length && !zpool_evictable(entry->pool->zpool)) {
+		/* zpool_evictable will be removed once all 3 backends have migrated */
+		spin_lock(&entry->pool->lru_lock);
+		list_move(&entry->lru, &entry->pool->lru);
+		spin_unlock(&entry->pool->lru_lock);
 	}
 	spin_unlock(&tree->lock);