diff mbox series

[09/14] dm-zoned: per-device reclaim

Message ID 20200529173907.40529-10-hare@suse.de (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series dm-zoned: multiple drive support | expand

Commit Message

Hannes Reinecke May 29, 2020, 5:39 p.m. UTC
Instead of having one reclaim workqueue for the entire set we should
be allocating a reclaim workqueue per device; that will reduce
contention and should boost performance for a multi-device setup.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/md/dm-zoned-reclaim.c | 66 +++++++++++++++++++++++++++----------------
 drivers/md/dm-zoned-target.c  | 41 +++++++++++++++++----------
 drivers/md/dm-zoned.h         | 38 +++++++++++++------------
 3 files changed, 88 insertions(+), 57 deletions(-)

Comments

Damien Le Moal May 31, 2020, 9:19 a.m. UTC | #1
On Fri, 2020-05-29 at 19:39 +0200, Hannes Reinecke wrote:
> Instead of having one reclaim workqueue for the entire set we should
> be allocating a reclaim workqueue per device; that will reduce
> contention and should boost performance for a multi-device setup.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/md/dm-zoned-reclaim.c | 66 +++++++++++++++++++++++++++----------------
>  drivers/md/dm-zoned-target.c  | 41 +++++++++++++++++----------
>  drivers/md/dm-zoned.h         | 38 +++++++++++++------------
>  3 files changed, 88 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
> index e9e3b730e258..09843645248a 100644
> --- a/drivers/md/dm-zoned-reclaim.c
> +++ b/drivers/md/dm-zoned-reclaim.c
> @@ -21,6 +21,8 @@ struct dmz_reclaim {
>  	struct dm_kcopyd_throttle kc_throttle;
>  	int			kc_err;
>  
> +	int			dev_idx;
> +
>  	unsigned long		flags;
>  
>  	/* Last target access time */
> @@ -198,8 +200,8 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
>  	struct dmz_metadata *zmd = zrc->metadata;
>  	int ret;
>  
> -	DMDEBUG("(%s): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
> -		dmz_metadata_label(zmd),
> +	DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
> +		dmz_metadata_label(zmd), zrc->dev_idx,
>  		dzone->chunk, bzone->id, dmz_weight(bzone),
>  		dzone->id, dmz_weight(dzone));
>  
> @@ -237,8 +239,8 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
>  	struct dmz_metadata *zmd = zrc->metadata;
>  	int ret = 0;
>  
> -	DMDEBUG("(%s): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
> -		dmz_metadata_label(zmd),
> +	DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
> +		dmz_metadata_label(zmd), zrc->dev_idx,
>  		chunk, dzone->id, dmz_weight(dzone),
>  		bzone->id, dmz_weight(bzone));
>  
> @@ -295,8 +297,8 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
>  	if (!szone)
>  		return -ENOSPC;
>  
> -	DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
> -		dmz_metadata_label(zmd), chunk,
> +	DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
> +		dmz_metadata_label(zmd), zrc->dev_idx, chunk,
>  		dmz_is_cache(dzone) ? "cache" : "rnd",
>  		dzone->id, dmz_weight(dzone),
>  		dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
> @@ -369,8 +371,8 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
>  	/* Get a data zone */
>  	dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
>  	if (!dzone) {
> -		DMDEBUG("(%s): No zone found to reclaim",
> -			dmz_metadata_label(zmd));
> +		DMDEBUG("(%s/%u): No zone found to reclaim",
> +			dmz_metadata_label(zmd), zrc->dev_idx);
>  		return -EBUSY;
>  	}
>  
> @@ -417,24 +419,26 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
>  out:
>  	if (ret) {
>  		if (ret == -EINTR)
> -			DMDEBUG("(%s): reclaim zone %u interrupted",
> -				dmz_metadata_label(zmd), rzone->id);
> +			DMDEBUG("(%s/%u): reclaim zone %u interrupted",
> +				dmz_metadata_label(zmd), zrc->dev_idx,
> +				rzone->id);
>  		else
> -			DMDEBUG("(%s): Failed to reclaim zone %u, err %d",
> -				dmz_metadata_label(zmd), rzone->id, ret);
> +			DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
> +				dmz_metadata_label(zmd), zrc->dev_idx,
> +				rzone->id, ret);
>  		dmz_unlock_zone_reclaim(dzone);
>  		return ret;
>  	}
>  
>  	ret = dmz_flush_metadata(zrc->metadata);
>  	if (ret) {
> -		DMDEBUG("(%s): Metadata flush for zone %u failed, err %d",
> -			dmz_metadata_label(zmd), rzone->id, ret);
> +		DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
> +			dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
>  		return ret;
>  	}
>  
> -	DMDEBUG("(%s): Reclaimed zone %u in %u ms",
> -		dmz_metadata_label(zmd),
> +	DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
> +		dmz_metadata_label(zmd), zrc->dev_idx,
>  		rzone->id, jiffies_to_msecs(jiffies - start));
>  	return 0;
>  }
> @@ -461,10 +465,20 @@ static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
>   */
>  static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
>  {
> -	unsigned int nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
> +	unsigned int nr_reclaim;
> +
> +	nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);

This change is not really necessary: same code in the end.

>  
> -	if (dmz_nr_cache_zones(zrc->metadata))
> +	if (dmz_nr_cache_zones(zrc->metadata)) {
> +		/*
> +		 * The first device in a multi-device
> +		 * setup only contains cache zones, so
> +		 * never start reclaim there.
> +		 */
> +		if (zrc->dev_idx == 0)
> +			return false;
>  		nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
> +	}
>  
>  	/* Reclaim when idle */
>  	if (dmz_target_idle(zrc) && nr_reclaim)
> @@ -488,7 +502,7 @@ static void dmz_reclaim_work(struct work_struct *work)
>  {
>  	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
>  	struct dmz_metadata *zmd = zrc->metadata;
> -	unsigned int p_unmap;
> +	unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
>  	int ret;
>  
>  	if (dmz_dev_is_dying(zmd))
> @@ -514,8 +528,11 @@ static void dmz_reclaim_work(struct work_struct *work)
>  		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
>  	}
>  
> -	DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
> -		dmz_metadata_label(zmd),
> +	nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
> +	nr_rnd = dmz_nr_rnd_zones(zmd);
> +
> +	DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
> +		dmz_metadata_label(zmd), zrc->dev_idx,
>  		zrc->kc_throttle.throttle,
>  		(dmz_target_idle(zrc) ? "Idle" : "Busy"),
>  		p_unmap, dmz_nr_unmap_cache_zones(zmd),
> @@ -536,7 +553,7 @@ static void dmz_reclaim_work(struct work_struct *work)
>   * Initialize reclaim.
>   */
>  int dmz_ctr_reclaim(struct dmz_metadata *zmd,
> -		    struct dmz_reclaim **reclaim)
> +		    struct dmz_reclaim **reclaim, int idx)
>  {
>  	struct dmz_reclaim *zrc;
>  	int ret;
> @@ -547,6 +564,7 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
>  
>  	zrc->metadata = zmd;
>  	zrc->atime = jiffies;
> +	zrc->dev_idx = idx;
>  
>  	/* Reclaim kcopyd client */
>  	zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
> @@ -558,8 +576,8 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
>  
>  	/* Reclaim work */
>  	INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
> -	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
> -					  dmz_metadata_label(zmd));
> +	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
> +					  dmz_metadata_label(zmd), idx);
>  	if (!zrc->wq) {
>  		ret = -ENOMEM;
>  		goto err;
> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
> index 087dd4801663..97d63d8e6c19 100644
> --- a/drivers/md/dm-zoned-target.c
> +++ b/drivers/md/dm-zoned-target.c
> @@ -41,6 +41,7 @@ struct dm_chunk_work {
>   */
>  struct dmz_target {
>  	struct dm_dev		*ddev[DMZ_MAX_DEVS];
> +	unsigned int		nr_ddevs;
>  
>  	unsigned long		flags;
>  
> @@ -50,9 +51,6 @@ struct dmz_target {
>  	/* For metadata handling */
>  	struct dmz_metadata     *metadata;
>  
> -	/* For reclaim */
> -	struct dmz_reclaim	*reclaim;
> -
>  	/* For chunk work */
>  	struct radix_tree_root	chunk_rxtree;
>  	struct workqueue_struct *chunk_wq;
> @@ -404,14 +402,15 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
>  		dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
>  	struct dmz_metadata *zmd = dmz->metadata;
>  	struct dm_zone *zone;
> -	int ret;
> +	int i, ret;
>  
>  	/*
>  	 * Write may trigger a zone allocation. So make sure the
>  	 * allocation can succeed.
>  	 */
>  	if (bio_op(bio) == REQ_OP_WRITE)
> -		dmz_schedule_reclaim(dmz->reclaim);
> +		for (i = 0; i < dmz->nr_ddevs; i++)
> +			dmz_schedule_reclaim(dmz->dev[i].reclaim);
>  
>  	dmz_lock_metadata(zmd);
>  
> @@ -431,6 +430,7 @@ static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
>  	if (zone) {
>  		dmz_activate_zone(zone);
>  		bioctx->zone = zone;
> +		dmz_reclaim_bio_acc(zone->dev->reclaim);
>  	}
>  
>  	switch (bio_op(bio)) {
> @@ -577,7 +577,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
>  
>  	bio_list_add(&cw->bio_list, bio);
>  
> -	dmz_reclaim_bio_acc(dmz->reclaim);
>  	if (queue_work(dmz->chunk_wq, &cw->work))
>  		dmz_get_chunk_work(cw);
>  out:
> @@ -822,7 +821,7 @@ static int dmz_fixup_devices(struct dm_target *ti)
>  static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  {
>  	struct dmz_target *dmz;
> -	int ret;
> +	int ret, i;
>  
>  	/* Check arguments */
>  	if (argc < 1 || argc > 2) {
> @@ -842,6 +841,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  		kfree(dmz);
>  		return -ENOMEM;
>  	}
> +	dmz->nr_ddevs = argc;
>  	ti->private = dmz;
>  
>  	/* Get the target zoned block device */
> @@ -916,10 +916,12 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  	mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
>  
>  	/* Initialize reclaim */
> -	ret = dmz_ctr_reclaim(dmz->metadata, &dmz->reclaim);
> -	if (ret) {
> -		ti->error = "Zone reclaim initialization failed";
> -		goto err_fwq;
> +	for (i = 0; i < dmz->nr_ddevs; i++) {
> +		ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
> +		if (ret) {
> +			ti->error = "Zone reclaim initialization failed";
> +			goto err_fwq;
> +		}
>  	}
>  
>  	DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
> @@ -952,11 +954,13 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
>  static void dmz_dtr(struct dm_target *ti)
>  {
>  	struct dmz_target *dmz = ti->private;
> +	int i;
>  
>  	flush_workqueue(dmz->chunk_wq);
>  	destroy_workqueue(dmz->chunk_wq);
>  
> -	dmz_dtr_reclaim(dmz->reclaim);
> +	for (i = 0; i < dmz->nr_ddevs; i++)
> +		dmz_dtr_reclaim(dmz->dev[i].reclaim);
>  
>  	cancel_delayed_work_sync(&dmz->flush_work);
>  	destroy_workqueue(dmz->flush_wq);
> @@ -1025,9 +1029,11 @@ static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
>  static void dmz_suspend(struct dm_target *ti)
>  {
>  	struct dmz_target *dmz = ti->private;
> +	int i;
>  
>  	flush_workqueue(dmz->chunk_wq);
> -	dmz_suspend_reclaim(dmz->reclaim);
> +	for (i = 0; i < dmz->nr_ddevs; i++)
> +		dmz_suspend_reclaim(dmz->dev[i].reclaim);
>  	cancel_delayed_work_sync(&dmz->flush_work);
>  }
>  
> @@ -1037,9 +1043,11 @@ static void dmz_suspend(struct dm_target *ti)
>  static void dmz_resume(struct dm_target *ti)
>  {
>  	struct dmz_target *dmz = ti->private;
> +	int i;
>  
>  	queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
> -	dmz_resume_reclaim(dmz->reclaim);
> +	for (i = 0; i < dmz->nr_ddevs; i++)
> +		dmz_resume_reclaim(dmz->dev[i].reclaim);
>  }
>  
>  static int dmz_iterate_devices(struct dm_target *ti,
> @@ -1100,7 +1108,10 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
>  	int r = -EINVAL;
>  
>  	if (!strcasecmp(argv[0], "reclaim")) {
> -		dmz_schedule_reclaim(dmz->reclaim);
> +		int i;
> +
> +		for (i = 0; i < dmz->nr_ddevs; i++)
> +			dmz_schedule_reclaim(dmz->dev[i].reclaim);
>  		r = 0;
>  	} else
>  		DMERR("unrecognized message %s", argv[0]);
> diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
> index 983f5b5e9fa0..0cc3459f78ce 100644
> --- a/drivers/md/dm-zoned.h
> +++ b/drivers/md/dm-zoned.h
> @@ -54,6 +54,7 @@ struct dmz_reclaim;
>  struct dmz_dev {
>  	struct block_device	*bdev;
>  	struct dmz_metadata	*metadata;
> +	struct dmz_reclaim	*reclaim;
>  
>  	char			name[BDEVNAME_SIZE];
>  	uuid_t			uuid;
> @@ -229,23 +230,6 @@ static inline void dmz_activate_zone(struct dm_zone *zone)
>  	atomic_inc(&zone->refcount);
>  }
>  
> -/*
> - * Deactivate a zone. This decrement the zone reference counter
> - * indicating that all BIOs to the zone have completed when the count is 0.
> - */
> -static inline void dmz_deactivate_zone(struct dm_zone *zone)
> -{
> -	atomic_dec(&zone->refcount);
> -}
> -
> -/*
> - * Test if a zone is active, that is, has a refcount > 0.
> - */
> -static inline bool dmz_is_active(struct dm_zone *zone)
> -{
> -	return atomic_read(&zone->refcount);
> -}
> -
>  int dmz_lock_zone_reclaim(struct dm_zone *zone);
>  void dmz_unlock_zone_reclaim(struct dm_zone *zone);
>  struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle);
> @@ -272,7 +256,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
>  /*
>   * Functions defined in dm-zoned-reclaim.c
>   */
> -int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc);
> +int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc, int idx);
>  void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
>  void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
>  void dmz_resume_reclaim(struct dmz_reclaim *zrc);
> @@ -285,4 +269,22 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
>  bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
>  bool dmz_check_bdev(struct dmz_dev *dmz_dev);
>  
> +/*
> + * Deactivate a zone. This decrement the zone reference counter
> + * indicating that all BIOs to the zone have completed when the count is 0.
> + */
> +static inline void dmz_deactivate_zone(struct dm_zone *zone)
> +{
> +	dmz_reclaim_bio_acc(zone->dev->reclaim);
> +	atomic_dec(&zone->refcount);
> +}
> +
> +/*
> + * Test if a zone is active, that is, has a refcount > 0.
> + */
> +static inline bool dmz_is_active(struct dm_zone *zone)
> +{
> +	return atomic_read(&zone->refcount);
> +}
> +
>  #endif /* DM_ZONED_H */

Apart from the nit above, looks good.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
diff mbox series

Patch

diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index e9e3b730e258..09843645248a 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -21,6 +21,8 @@  struct dmz_reclaim {
 	struct dm_kcopyd_throttle kc_throttle;
 	int			kc_err;
 
+	int			dev_idx;
+
 	unsigned long		flags;
 
 	/* Last target access time */
@@ -198,8 +200,8 @@  static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 	struct dmz_metadata *zmd = zrc->metadata;
 	int ret;
 
-	DMDEBUG("(%s): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
-		dmz_metadata_label(zmd),
+	DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone %u (weight %u)",
+		dmz_metadata_label(zmd), zrc->dev_idx,
 		dzone->chunk, bzone->id, dmz_weight(bzone),
 		dzone->id, dmz_weight(dzone));
 
@@ -237,8 +239,8 @@  static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 	struct dmz_metadata *zmd = zrc->metadata;
 	int ret = 0;
 
-	DMDEBUG("(%s): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
-		dmz_metadata_label(zmd),
+	DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone %u (weight %u)",
+		dmz_metadata_label(zmd), zrc->dev_idx,
 		chunk, dzone->id, dmz_weight(dzone),
 		bzone->id, dmz_weight(bzone));
 
@@ -295,8 +297,8 @@  static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, struct dm_zone *dzone)
 	if (!szone)
 		return -ENOSPC;
 
-	DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
-		dmz_metadata_label(zmd), chunk,
+	DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
+		dmz_metadata_label(zmd), zrc->dev_idx, chunk,
 		dmz_is_cache(dzone) ? "cache" : "rnd",
 		dzone->id, dmz_weight(dzone),
 		dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
@@ -369,8 +371,8 @@  static int dmz_do_reclaim(struct dmz_reclaim *zrc)
 	/* Get a data zone */
 	dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
 	if (!dzone) {
-		DMDEBUG("(%s): No zone found to reclaim",
-			dmz_metadata_label(zmd));
+		DMDEBUG("(%s/%u): No zone found to reclaim",
+			dmz_metadata_label(zmd), zrc->dev_idx);
 		return -EBUSY;
 	}
 
@@ -417,24 +419,26 @@  static int dmz_do_reclaim(struct dmz_reclaim *zrc)
 out:
 	if (ret) {
 		if (ret == -EINTR)
-			DMDEBUG("(%s): reclaim zone %u interrupted",
-				dmz_metadata_label(zmd), rzone->id);
+			DMDEBUG("(%s/%u): reclaim zone %u interrupted",
+				dmz_metadata_label(zmd), zrc->dev_idx,
+				rzone->id);
 		else
-			DMDEBUG("(%s): Failed to reclaim zone %u, err %d",
-				dmz_metadata_label(zmd), rzone->id, ret);
+			DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
+				dmz_metadata_label(zmd), zrc->dev_idx,
+				rzone->id, ret);
 		dmz_unlock_zone_reclaim(dzone);
 		return ret;
 	}
 
 	ret = dmz_flush_metadata(zrc->metadata);
 	if (ret) {
-		DMDEBUG("(%s): Metadata flush for zone %u failed, err %d",
-			dmz_metadata_label(zmd), rzone->id, ret);
+		DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
+			dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
 		return ret;
 	}
 
-	DMDEBUG("(%s): Reclaimed zone %u in %u ms",
-		dmz_metadata_label(zmd),
+	DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
+		dmz_metadata_label(zmd), zrc->dev_idx,
 		rzone->id, jiffies_to_msecs(jiffies - start));
 	return 0;
 }
@@ -461,10 +465,20 @@  static unsigned int dmz_reclaim_percentage(struct dmz_reclaim *zrc)
  */
 static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
 {
-	unsigned int nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
+	unsigned int nr_reclaim;
+
+	nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
 
-	if (dmz_nr_cache_zones(zrc->metadata))
+	if (dmz_nr_cache_zones(zrc->metadata)) {
+		/*
+		 * The first device in a multi-device
+		 * setup only contains cache zones, so
+		 * never start reclaim there.
+		 */
+		if (zrc->dev_idx == 0)
+			return false;
 		nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
+	}
 
 	/* Reclaim when idle */
 	if (dmz_target_idle(zrc) && nr_reclaim)
@@ -488,7 +502,7 @@  static void dmz_reclaim_work(struct work_struct *work)
 {
 	struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, work.work);
 	struct dmz_metadata *zmd = zrc->metadata;
-	unsigned int p_unmap;
+	unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
 	int ret;
 
 	if (dmz_dev_is_dying(zmd))
@@ -514,8 +528,11 @@  static void dmz_reclaim_work(struct work_struct *work)
 		zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
 	}
 
-	DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
-		dmz_metadata_label(zmd),
+	nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd);
+	nr_rnd = dmz_nr_rnd_zones(zmd);
+
+	DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u random)",
+		dmz_metadata_label(zmd), zrc->dev_idx,
 		zrc->kc_throttle.throttle,
 		(dmz_target_idle(zrc) ? "Idle" : "Busy"),
 		p_unmap, dmz_nr_unmap_cache_zones(zmd),
@@ -536,7 +553,7 @@  static void dmz_reclaim_work(struct work_struct *work)
  * Initialize reclaim.
  */
 int dmz_ctr_reclaim(struct dmz_metadata *zmd,
-		    struct dmz_reclaim **reclaim)
+		    struct dmz_reclaim **reclaim, int idx)
 {
 	struct dmz_reclaim *zrc;
 	int ret;
@@ -547,6 +564,7 @@  int dmz_ctr_reclaim(struct dmz_metadata *zmd,
 
 	zrc->metadata = zmd;
 	zrc->atime = jiffies;
+	zrc->dev_idx = idx;
 
 	/* Reclaim kcopyd client */
 	zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
@@ -558,8 +576,8 @@  int dmz_ctr_reclaim(struct dmz_metadata *zmd,
 
 	/* Reclaim work */
 	INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
-	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
-					  dmz_metadata_label(zmd));
+	zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
+					  dmz_metadata_label(zmd), idx);
 	if (!zrc->wq) {
 		ret = -ENOMEM;
 		goto err;
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 087dd4801663..97d63d8e6c19 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -41,6 +41,7 @@  struct dm_chunk_work {
  */
 struct dmz_target {
 	struct dm_dev		*ddev[DMZ_MAX_DEVS];
+	unsigned int		nr_ddevs;
 
 	unsigned long		flags;
 
@@ -50,9 +51,6 @@  struct dmz_target {
 	/* For metadata handling */
 	struct dmz_metadata     *metadata;
 
-	/* For reclaim */
-	struct dmz_reclaim	*reclaim;
-
 	/* For chunk work */
 	struct radix_tree_root	chunk_rxtree;
 	struct workqueue_struct *chunk_wq;
@@ -404,14 +402,15 @@  static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
 		dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 	struct dmz_metadata *zmd = dmz->metadata;
 	struct dm_zone *zone;
-	int ret;
+	int i, ret;
 
 	/*
 	 * Write may trigger a zone allocation. So make sure the
 	 * allocation can succeed.
 	 */
 	if (bio_op(bio) == REQ_OP_WRITE)
-		dmz_schedule_reclaim(dmz->reclaim);
+		for (i = 0; i < dmz->nr_ddevs; i++)
+			dmz_schedule_reclaim(dmz->dev[i].reclaim);
 
 	dmz_lock_metadata(zmd);
 
@@ -431,6 +430,7 @@  static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
 	if (zone) {
 		dmz_activate_zone(zone);
 		bioctx->zone = zone;
+		dmz_reclaim_bio_acc(zone->dev->reclaim);
 	}
 
 	switch (bio_op(bio)) {
@@ -577,7 +577,6 @@  static int dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
 
 	bio_list_add(&cw->bio_list, bio);
 
-	dmz_reclaim_bio_acc(dmz->reclaim);
 	if (queue_work(dmz->chunk_wq, &cw->work))
 		dmz_get_chunk_work(cw);
 out:
@@ -822,7 +821,7 @@  static int dmz_fixup_devices(struct dm_target *ti)
 static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct dmz_target *dmz;
-	int ret;
+	int ret, i;
 
 	/* Check arguments */
 	if (argc < 1 || argc > 2) {
@@ -842,6 +841,7 @@  static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		kfree(dmz);
 		return -ENOMEM;
 	}
+	dmz->nr_ddevs = argc;
 	ti->private = dmz;
 
 	/* Get the target zoned block device */
@@ -916,10 +916,12 @@  static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
 
 	/* Initialize reclaim */
-	ret = dmz_ctr_reclaim(dmz->metadata, &dmz->reclaim);
-	if (ret) {
-		ti->error = "Zone reclaim initialization failed";
-		goto err_fwq;
+	for (i = 0; i < dmz->nr_ddevs; i++) {
+		ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
+		if (ret) {
+			ti->error = "Zone reclaim initialization failed";
+			goto err_fwq;
+		}
 	}
 
 	DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu blocks)",
@@ -952,11 +954,13 @@  static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 static void dmz_dtr(struct dm_target *ti)
 {
 	struct dmz_target *dmz = ti->private;
+	int i;
 
 	flush_workqueue(dmz->chunk_wq);
 	destroy_workqueue(dmz->chunk_wq);
 
-	dmz_dtr_reclaim(dmz->reclaim);
+	for (i = 0; i < dmz->nr_ddevs; i++)
+		dmz_dtr_reclaim(dmz->dev[i].reclaim);
 
 	cancel_delayed_work_sync(&dmz->flush_work);
 	destroy_workqueue(dmz->flush_wq);
@@ -1025,9 +1029,11 @@  static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
 static void dmz_suspend(struct dm_target *ti)
 {
 	struct dmz_target *dmz = ti->private;
+	int i;
 
 	flush_workqueue(dmz->chunk_wq);
-	dmz_suspend_reclaim(dmz->reclaim);
+	for (i = 0; i < dmz->nr_ddevs; i++)
+		dmz_suspend_reclaim(dmz->dev[i].reclaim);
 	cancel_delayed_work_sync(&dmz->flush_work);
 }
 
@@ -1037,9 +1043,11 @@  static void dmz_suspend(struct dm_target *ti)
 static void dmz_resume(struct dm_target *ti)
 {
 	struct dmz_target *dmz = ti->private;
+	int i;
 
 	queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
-	dmz_resume_reclaim(dmz->reclaim);
+	for (i = 0; i < dmz->nr_ddevs; i++)
+		dmz_resume_reclaim(dmz->dev[i].reclaim);
 }
 
 static int dmz_iterate_devices(struct dm_target *ti,
@@ -1100,7 +1108,10 @@  static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
 	int r = -EINVAL;
 
 	if (!strcasecmp(argv[0], "reclaim")) {
-		dmz_schedule_reclaim(dmz->reclaim);
+		int i;
+
+		for (i = 0; i < dmz->nr_ddevs; i++)
+			dmz_schedule_reclaim(dmz->dev[i].reclaim);
 		r = 0;
 	} else
 		DMERR("unrecognized message %s", argv[0]);
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 983f5b5e9fa0..0cc3459f78ce 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -54,6 +54,7 @@  struct dmz_reclaim;
 struct dmz_dev {
 	struct block_device	*bdev;
 	struct dmz_metadata	*metadata;
+	struct dmz_reclaim	*reclaim;
 
 	char			name[BDEVNAME_SIZE];
 	uuid_t			uuid;
@@ -229,23 +230,6 @@  static inline void dmz_activate_zone(struct dm_zone *zone)
 	atomic_inc(&zone->refcount);
 }
 
-/*
- * Deactivate a zone. This decrement the zone reference counter
- * indicating that all BIOs to the zone have completed when the count is 0.
- */
-static inline void dmz_deactivate_zone(struct dm_zone *zone)
-{
-	atomic_dec(&zone->refcount);
-}
-
-/*
- * Test if a zone is active, that is, has a refcount > 0.
- */
-static inline bool dmz_is_active(struct dm_zone *zone)
-{
-	return atomic_read(&zone->refcount);
-}
-
 int dmz_lock_zone_reclaim(struct dm_zone *zone);
 void dmz_unlock_zone_reclaim(struct dm_zone *zone);
 struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool idle);
@@ -272,7 +256,7 @@  int dmz_merge_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone,
 /*
  * Functions defined in dm-zoned-reclaim.c
  */
-int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc);
+int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc, int idx);
 void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
 void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
 void dmz_resume_reclaim(struct dmz_reclaim *zrc);
@@ -285,4 +269,22 @@  void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
 bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
 bool dmz_check_bdev(struct dmz_dev *dmz_dev);
 
+/*
+ * Deactivate a zone. This decrement the zone reference counter
+ * indicating that all BIOs to the zone have completed when the count is 0.
+ */
+static inline void dmz_deactivate_zone(struct dm_zone *zone)
+{
+	dmz_reclaim_bio_acc(zone->dev->reclaim);
+	atomic_dec(&zone->refcount);
+}
+
+/*
+ * Test if a zone is active, that is, has a refcount > 0.
+ */
+static inline bool dmz_is_active(struct dm_zone *zone)
+{
+	return atomic_read(&zone->refcount);
+}
+
 #endif /* DM_ZONED_H */