diff mbox series

[RESEND,v6,8/9] md: Implement dax_holder_operations

Message ID 20210730100158.3117319-9-ruansy.fnst@fujitsu.com (mailing list archive)
State New
Headers show
Series fsdax: introduce fs query to support reflink | expand

Commit Message

Shiyang Ruan July 30, 2021, 10:01 a.m. UTC
This is the case where the holder represents a mapped device, or a list
of mapped devices more exactly(because it is possible to create more
than one mapped device on one pmem device).

Find out which mapped device the offset belongs to, and translate the
offset from target device to mapped device.  When it is done, call
dax_corrupted_range() for the holder of this mapped device.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 drivers/md/dm.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 125 insertions(+), 1 deletion(-)

Comments

Jane Chu Aug. 6, 2021, 12:48 a.m. UTC | #1
On 7/30/2021 3:01 AM, Shiyang Ruan wrote:
> This is the case where the holder represents a mapped device, or a list
> of mapped devices more exactly(because it is possible to create more
> than one mapped device on one pmem device).

Could you share how do you test this scenario?

thanks,
-jane

> 
> Find out which mapped device the offset belongs to, and translate the
> offset from target device to mapped device.  When it is done, call
> dax_corrupted_range() for the holder of this mapped device.
> 
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> ---
>   drivers/md/dm.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 125 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 2c5f9e585211..a35b9a97a73f 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -626,7 +626,11 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
>   }
>   
>   static char *_dm_claim_ptr = "I belong to device-mapper";
> -
> +static const struct dax_holder_operations dm_dax_holder_ops;
> +struct dm_holder {
> +	struct list_head list;
> +	struct mapped_device *md;
> +};
>   /*
>    * Open a table device so we can use it as a map destination.
>    */
> @@ -634,6 +638,8 @@ static int open_table_device(struct table_device *td, dev_t dev,
>   			     struct mapped_device *md)
>   {
>   	struct block_device *bdev;
> +	struct list_head *holders;
> +	struct dm_holder *holder;
>   
>   	int r;
>   
> @@ -651,6 +657,19 @@ static int open_table_device(struct table_device *td, dev_t dev,
>   
>   	td->dm_dev.bdev = bdev;
>   	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
> +	if (!td->dm_dev.dax_dev)
> +		return 0;
> +
> +	holders = dax_get_holder(td->dm_dev.dax_dev);
> +	if (!holders) {
> +		holders = kmalloc(sizeof(*holders), GFP_KERNEL);
> +		INIT_LIST_HEAD(holders);
> +		dax_set_holder(td->dm_dev.dax_dev, holders, &dm_dax_holder_ops);
> +	}
> +	holder = kmalloc(sizeof(*holder), GFP_KERNEL);
> +	holder->md = md;
> +	list_add_tail(&holder->list, holders);
> +
>   	return 0;
>   }
>   
> @@ -659,9 +678,27 @@ static int open_table_device(struct table_device *td, dev_t dev,
>    */
>   static void close_table_device(struct table_device *td, struct mapped_device *md)
>   {
> +	struct list_head *holders;
> +	struct dm_holder *holder, *n;
> +
>   	if (!td->dm_dev.bdev)
>   		return;
>   
> +	holders = dax_get_holder(td->dm_dev.dax_dev);
> +	if (holders) {
> +		list_for_each_entry_safe(holder, n, holders, list) {
> +			if (holder->md == md) {
> +				list_del(&holder->list);
> +				kfree(holder);
> +			}
> +		}
> +		if (list_empty(holders)) {
> +			kfree(holders);
> +			/* unset dax_device's holder_data */
> +			dax_set_holder(td->dm_dev.dax_dev, NULL, NULL);
> +		}
> +	}
> +
>   	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
>   	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
>   	put_dax(td->dm_dev.dax_dev);
> @@ -1115,6 +1152,89 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
>   	return ret;
>   }
>   
> +#if IS_ENABLED(CONFIG_DAX_DRIVER)
> +struct corrupted_hit_info {
> +	struct dax_device *dax_dev;
> +	sector_t offset;
> +};
> +
> +static int dm_blk_corrupted_hit(struct dm_target *ti, struct dm_dev *dev,
> +				sector_t start, sector_t count, void *data)
> +{
> +	struct corrupted_hit_info *bc = data;
> +
> +	return bc->dax_dev == (void *)dev->dax_dev &&
> +			(start <= bc->offset && bc->offset < start + count);
> +}
> +
> +struct corrupted_do_info {
> +	size_t length;
> +	void *data;
> +};
> +
> +static int dm_blk_corrupted_do(struct dm_target *ti, struct block_device *bdev,
> +			       sector_t sector, void *data)
> +{
> +	struct mapped_device *md = ti->table->md;
> +	struct corrupted_do_info *bc = data;
> +
> +	return dax_holder_notify_failure(md->dax_dev, to_bytes(sector),
> +					 bc->length, bc->data);
> +}
> +
> +static int dm_dax_notify_failure_one(struct mapped_device *md,
> +				     struct dax_device *dax_dev,
> +				     loff_t offset, size_t length, void *data)
> +{
> +	struct dm_table *map;
> +	struct dm_target *ti;
> +	sector_t sect = to_sector(offset);
> +	struct corrupted_hit_info hi = {dax_dev, sect};
> +	struct corrupted_do_info di = {length, data};
> +	int srcu_idx, i, rc = -ENODEV;
> +
> +	map = dm_get_live_table(md, &srcu_idx);
> +	if (!map)
> +		return rc;
> +
> +	/*
> +	 * find the target device, and then translate the offset of this target
> +	 * to the whole mapped device.
> +	 */
> +	for (i = 0; i < dm_table_get_num_targets(map); i++) {
> +		ti = dm_table_get_target(map, i);
> +		if (!(ti->type->iterate_devices && ti->type->rmap))
> +			continue;
> +		if (!ti->type->iterate_devices(ti, dm_blk_corrupted_hit, &hi))
> +			continue;
> +
> +		rc = ti->type->rmap(ti, sect, dm_blk_corrupted_do, &di);
> +		break;
> +	}
> +
> +	dm_put_live_table(md, srcu_idx);
> +	return rc;
> +}
> +
> +static int dm_dax_notify_failure(struct dax_device *dax_dev,
> +				 loff_t offset, size_t length, void *data)
> +{
> +	struct dm_holder *holder;
> +	struct list_head *holders = dax_get_holder(dax_dev);
> +	int rc = -ENODEV;
> +
> +	list_for_each_entry(holder, holders, list) {
> +		rc = dm_dax_notify_failure_one(holder->md, dax_dev, offset,
> +					       length, data);
> +		if (rc != -ENODEV)
> +			break;
> +	}
> +	return rc;
> +}
> +#else
> +#define dm_dax_notify_failure NULL
> +#endif
> +
>   /*
>    * A target may call dm_accept_partial_bio only from the map routine.  It is
>    * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
> @@ -3057,6 +3177,10 @@ static const struct dax_operations dm_dax_ops = {
>   	.zero_page_range = dm_dax_zero_page_range,
>   };
>   
> +static const struct dax_holder_operations dm_dax_holder_ops = {
> +	.notify_failure = dm_dax_notify_failure,
> +};
> +
>   /*
>    * module hooks
>    */
>
Shiyang Ruan Aug. 17, 2021, 1:59 a.m. UTC | #2
> -----Original Message-----
> From: Jane Chu <jane.chu@oracle.com>
> Subject: Re: [PATCH RESEND v6 8/9] md: Implement dax_holder_operations
> 
> On 7/30/2021 3:01 AM, Shiyang Ruan wrote:
> > This is the case where the holder represents a mapped device, or a
> > list of mapped devices more exactly(because it is possible to create
> > more than one mapped device on one pmem device).
> 
> Could you share how do you test this scenario?

Do you mean "more than one mapped device on one pmem device"?

1. Create 2 partitions on a pmem device(fsdax mode).
2. Create LVM(one LV) on each partition.
3. Create xfs filesystem on each LVM.
4. Memory failure on this pmem.

In this case, there are 2 LVMs on one pmem device.  So we should register this 2 LVMs in dax_holder, and iterate them when notifying the failure.

--
Thanks,
Ruan.

> 
> thanks,
> -jane
> 
> >
> > Find out which mapped device the offset belongs to, and translate the
> > offset from target device to mapped device.  When it is done, call
> > dax_corrupted_range() for the holder of this mapped device.
> >
> > Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> > ---
> >   drivers/md/dm.c | 126
> +++++++++++++++++++++++++++++++++++++++++++++++-
> >   1 file changed, 125 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/md/dm.c b/drivers/md/dm.c index
> > 2c5f9e585211..a35b9a97a73f 100644
> > --- a/drivers/md/dm.c
> > +++ b/drivers/md/dm.c
> > @@ -626,7 +626,11 @@ static void dm_put_live_table_fast(struct
> mapped_device *md) __releases(RCU)
> >   }
> >
> >   static char *_dm_claim_ptr = "I belong to device-mapper";
> > -
> > +static const struct dax_holder_operations dm_dax_holder_ops; struct
> > +dm_holder {
> > +	struct list_head list;
> > +	struct mapped_device *md;
> > +};
> >   /*
> >    * Open a table device so we can use it as a map destination.
> >    */
> > @@ -634,6 +638,8 @@ static int open_table_device(struct table_device *td,
> dev_t dev,
> >   			     struct mapped_device *md)
> >   {
> >   	struct block_device *bdev;
> > +	struct list_head *holders;
> > +	struct dm_holder *holder;
> >
> >   	int r;
> >
> > @@ -651,6 +657,19 @@ static int open_table_device(struct table_device
> > *td, dev_t dev,
> >
> >   	td->dm_dev.bdev = bdev;
> >   	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
> > +	if (!td->dm_dev.dax_dev)
> > +		return 0;
> > +
> > +	holders = dax_get_holder(td->dm_dev.dax_dev);
> > +	if (!holders) {
> > +		holders = kmalloc(sizeof(*holders), GFP_KERNEL);
> > +		INIT_LIST_HEAD(holders);
> > +		dax_set_holder(td->dm_dev.dax_dev, holders, &dm_dax_holder_ops);
> > +	}
> > +	holder = kmalloc(sizeof(*holder), GFP_KERNEL);
> > +	holder->md = md;
> > +	list_add_tail(&holder->list, holders);
> > +
> >   	return 0;
> >   }
> >
> > @@ -659,9 +678,27 @@ static int open_table_device(struct table_device *td,
> dev_t dev,
> >    */
> >   static void close_table_device(struct table_device *td, struct
> mapped_device *md)
> >   {
> > +	struct list_head *holders;
> > +	struct dm_holder *holder, *n;
> > +
> >   	if (!td->dm_dev.bdev)
> >   		return;
> >
> > +	holders = dax_get_holder(td->dm_dev.dax_dev);
> > +	if (holders) {
> > +		list_for_each_entry_safe(holder, n, holders, list) {
> > +			if (holder->md == md) {
> > +				list_del(&holder->list);
> > +				kfree(holder);
> > +			}
> > +		}
> > +		if (list_empty(holders)) {
> > +			kfree(holders);
> > +			/* unset dax_device's holder_data */
> > +			dax_set_holder(td->dm_dev.dax_dev, NULL, NULL);
> > +		}
> > +	}
> > +
> >   	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
> >   	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
> >   	put_dax(td->dm_dev.dax_dev);
> > @@ -1115,6 +1152,89 @@ static int dm_dax_zero_page_range(struct
> dax_device *dax_dev, pgoff_t pgoff,
> >   	return ret;
> >   }
> >
> > +#if IS_ENABLED(CONFIG_DAX_DRIVER)
> > +struct corrupted_hit_info {
> > +	struct dax_device *dax_dev;
> > +	sector_t offset;
> > +};
> > +
> > +static int dm_blk_corrupted_hit(struct dm_target *ti, struct dm_dev *dev,
> > +				sector_t start, sector_t count, void *data) {
> > +	struct corrupted_hit_info *bc = data;
> > +
> > +	return bc->dax_dev == (void *)dev->dax_dev &&
> > +			(start <= bc->offset && bc->offset < start + count); }
> > +
> > +struct corrupted_do_info {
> > +	size_t length;
> > +	void *data;
> > +};
> > +
> > +static int dm_blk_corrupted_do(struct dm_target *ti, struct block_device
> *bdev,
> > +			       sector_t sector, void *data) {
> > +	struct mapped_device *md = ti->table->md;
> > +	struct corrupted_do_info *bc = data;
> > +
> > +	return dax_holder_notify_failure(md->dax_dev, to_bytes(sector),
> > +					 bc->length, bc->data);
> > +}
> > +
> > +static int dm_dax_notify_failure_one(struct mapped_device *md,
> > +				     struct dax_device *dax_dev,
> > +				     loff_t offset, size_t length, void *data) {
> > +	struct dm_table *map;
> > +	struct dm_target *ti;
> > +	sector_t sect = to_sector(offset);
> > +	struct corrupted_hit_info hi = {dax_dev, sect};
> > +	struct corrupted_do_info di = {length, data};
> > +	int srcu_idx, i, rc = -ENODEV;
> > +
> > +	map = dm_get_live_table(md, &srcu_idx);
> > +	if (!map)
> > +		return rc;
> > +
> > +	/*
> > +	 * find the target device, and then translate the offset of this target
> > +	 * to the whole mapped device.
> > +	 */
> > +	for (i = 0; i < dm_table_get_num_targets(map); i++) {
> > +		ti = dm_table_get_target(map, i);
> > +		if (!(ti->type->iterate_devices && ti->type->rmap))
> > +			continue;
> > +		if (!ti->type->iterate_devices(ti, dm_blk_corrupted_hit, &hi))
> > +			continue;
> > +
> > +		rc = ti->type->rmap(ti, sect, dm_blk_corrupted_do, &di);
> > +		break;
> > +	}
> > +
> > +	dm_put_live_table(md, srcu_idx);
> > +	return rc;
> > +}
> > +
> > +static int dm_dax_notify_failure(struct dax_device *dax_dev,
> > +				 loff_t offset, size_t length, void *data) {
> > +	struct dm_holder *holder;
> > +	struct list_head *holders = dax_get_holder(dax_dev);
> > +	int rc = -ENODEV;
> > +
> > +	list_for_each_entry(holder, holders, list) {
> > +		rc = dm_dax_notify_failure_one(holder->md, dax_dev, offset,
> > +					       length, data);
> > +		if (rc != -ENODEV)
> > +			break;
> > +	}
> > +	return rc;
> > +}
> > +#else
> > +#define dm_dax_notify_failure NULL
> > +#endif
> > +
> >   /*
> >    * A target may call dm_accept_partial_bio only from the map routine.
> It is
> >    * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone
> > management @@ -3057,6 +3177,10 @@ static const struct dax_operations
> dm_dax_ops = {
> >   	.zero_page_range = dm_dax_zero_page_range,
> >   };
> >
> > +static const struct dax_holder_operations dm_dax_holder_ops = {
> > +	.notify_failure = dm_dax_notify_failure, };
> > +
> >   /*
> >    * module hooks
> >    */
> >
diff mbox series

Patch

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 2c5f9e585211..a35b9a97a73f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -626,7 +626,11 @@  static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
 }
 
 static char *_dm_claim_ptr = "I belong to device-mapper";
-
+static const struct dax_holder_operations dm_dax_holder_ops;
+struct dm_holder {
+	struct list_head list;
+	struct mapped_device *md;
+};
 /*
  * Open a table device so we can use it as a map destination.
  */
@@ -634,6 +638,8 @@  static int open_table_device(struct table_device *td, dev_t dev,
 			     struct mapped_device *md)
 {
 	struct block_device *bdev;
+	struct list_head *holders;
+	struct dm_holder *holder;
 
 	int r;
 
@@ -651,6 +657,19 @@  static int open_table_device(struct table_device *td, dev_t dev,
 
 	td->dm_dev.bdev = bdev;
 	td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	if (!td->dm_dev.dax_dev)
+		return 0;
+
+	holders = dax_get_holder(td->dm_dev.dax_dev);
+	if (!holders) {
+		holders = kmalloc(sizeof(*holders), GFP_KERNEL);
+		INIT_LIST_HEAD(holders);
+		dax_set_holder(td->dm_dev.dax_dev, holders, &dm_dax_holder_ops);
+	}
+	holder = kmalloc(sizeof(*holder), GFP_KERNEL);
+	holder->md = md;
+	list_add_tail(&holder->list, holders);
+
 	return 0;
 }
 
@@ -659,9 +678,27 @@  static int open_table_device(struct table_device *td, dev_t dev,
  */
 static void close_table_device(struct table_device *td, struct mapped_device *md)
 {
+	struct list_head *holders;
+	struct dm_holder *holder, *n;
+
 	if (!td->dm_dev.bdev)
 		return;
 
+	holders = dax_get_holder(td->dm_dev.dax_dev);
+	if (holders) {
+		list_for_each_entry_safe(holder, n, holders, list) {
+			if (holder->md == md) {
+				list_del(&holder->list);
+				kfree(holder);
+			}
+		}
+		if (list_empty(holders)) {
+			kfree(holders);
+			/* unset dax_device's holder_data */
+			dax_set_holder(td->dm_dev.dax_dev, NULL, NULL);
+		}
+	}
+
 	bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
 	blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
 	put_dax(td->dm_dev.dax_dev);
@@ -1115,6 +1152,89 @@  static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
 	return ret;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+struct corrupted_hit_info {
+	struct dax_device *dax_dev;
+	sector_t offset;
+};
+
+static int dm_blk_corrupted_hit(struct dm_target *ti, struct dm_dev *dev,
+				sector_t start, sector_t count, void *data)
+{
+	struct corrupted_hit_info *bc = data;
+
+	return bc->dax_dev == (void *)dev->dax_dev &&
+			(start <= bc->offset && bc->offset < start + count);
+}
+
+struct corrupted_do_info {
+	size_t length;
+	void *data;
+};
+
+static int dm_blk_corrupted_do(struct dm_target *ti, struct block_device *bdev,
+			       sector_t sector, void *data)
+{
+	struct mapped_device *md = ti->table->md;
+	struct corrupted_do_info *bc = data;
+
+	return dax_holder_notify_failure(md->dax_dev, to_bytes(sector),
+					 bc->length, bc->data);
+}
+
+static int dm_dax_notify_failure_one(struct mapped_device *md,
+				     struct dax_device *dax_dev,
+				     loff_t offset, size_t length, void *data)
+{
+	struct dm_table *map;
+	struct dm_target *ti;
+	sector_t sect = to_sector(offset);
+	struct corrupted_hit_info hi = {dax_dev, sect};
+	struct corrupted_do_info di = {length, data};
+	int srcu_idx, i, rc = -ENODEV;
+
+	map = dm_get_live_table(md, &srcu_idx);
+	if (!map)
+		return rc;
+
+	/*
+	 * find the target device, and then translate the offset of this target
+	 * to the whole mapped device.
+	 */
+	for (i = 0; i < dm_table_get_num_targets(map); i++) {
+		ti = dm_table_get_target(map, i);
+		if (!(ti->type->iterate_devices && ti->type->rmap))
+			continue;
+		if (!ti->type->iterate_devices(ti, dm_blk_corrupted_hit, &hi))
+			continue;
+
+		rc = ti->type->rmap(ti, sect, dm_blk_corrupted_do, &di);
+		break;
+	}
+
+	dm_put_live_table(md, srcu_idx);
+	return rc;
+}
+
+static int dm_dax_notify_failure(struct dax_device *dax_dev,
+				 loff_t offset, size_t length, void *data)
+{
+	struct dm_holder *holder;
+	struct list_head *holders = dax_get_holder(dax_dev);
+	int rc = -ENODEV;
+
+	list_for_each_entry(holder, holders, list) {
+		rc = dm_dax_notify_failure_one(holder->md, dax_dev, offset,
+					       length, data);
+		if (rc != -ENODEV)
+			break;
+	}
+	return rc;
+}
+#else
+#define dm_dax_notify_failure NULL
+#endif
+
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
  * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management
@@ -3057,6 +3177,10 @@  static const struct dax_operations dm_dax_ops = {
 	.zero_page_range = dm_dax_zero_page_range,
 };
 
+static const struct dax_holder_operations dm_dax_holder_ops = {
+	.notify_failure = dm_dax_notify_failure,
+};
+
 /*
  * module hooks
  */