diff mbox series

dax: Arrange for dax_supported check to span multiple devices

Message ID 155789172402.748145.11853718580748830476.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State New, archived
Headers show
Series dax: Arrange for dax_supported check to span multiple devices | expand

Commit Message

Dan Williams May 15, 2019, 3:48 a.m. UTC
Pankaj reports that starting with commit ad428cdb525a "dax: Check the
end of the block-device capacity with dax_direct_access()" device-mapper
no longer allows dax operation. This results from the stricter checks in
__bdev_dax_supported() that validate that the start and end of a
block-device map to the same 'pagemap' instance.

Teach the dax-core and device-mapper to validate the 'pagemap' on a
per-target basis. This is accomplished by refactoring the
bdev_dax_supported() internals into generic_fsdax_supported() which
takes a sector range to validate. Consequently generic_fsdax_supported()
is suitable to be used in a device-mapper ->iterate_devices() callback.
A new ->dax_supported() operation is added to allow composite devices to
split and route upper-level bdev_dax_supported() requests.

Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
Cc: <stable@vger.kernel.org>
Cc: Jan Kara <jack@suse.cz>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reported-by: Pankaj Gupta <pagupta@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
Hi Mike,

Another day another new dax operation to allow device-mapper to better
scope dax operations.

Let me know if the device-mapper changes look sane. This passes a new
unit test that indeed fails on current mainline.

https://github.com/pmem/ndctl/blob/device-mapper-pending/test/dm.sh

 drivers/dax/super.c          |   88 +++++++++++++++++++++++++++---------------
 drivers/md/dm-table.c        |   17 +++++---
 drivers/md/dm.c              |   20 ++++++++++
 drivers/md/dm.h              |    1 
 drivers/nvdimm/pmem.c        |    1 
 drivers/s390/block/dcssblk.c |    1 
 include/linux/dax.h          |   19 +++++++++
 7 files changed, 110 insertions(+), 37 deletions(-)

Comments

Jan Kara May 15, 2019, 7:30 a.m. UTC | #1
On Tue 14-05-19 20:48:49, Dan Williams wrote:
> Pankaj reports that starting with commit ad428cdb525a "dax: Check the
> end of the block-device capacity with dax_direct_access()" device-mapper
> no longer allows dax operation. This results from the stricter checks in
> __bdev_dax_supported() that validate that the start and end of a
> block-device map to the same 'pagemap' instance.
> 
> Teach the dax-core and device-mapper to validate the 'pagemap' on a
> per-target basis. This is accomplished by refactoring the
> bdev_dax_supported() internals into generic_fsdax_supported() which
> takes a sector range to validate. Consequently generic_fsdax_supported()
> is suitable to be used in a device-mapper ->iterate_devices() callback.
> A new ->dax_supported() operation is added to allow composite devices to
> split and route upper-level bdev_dax_supported() requests.
> 
> Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
> Cc: <stable@vger.kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Mike Snitzer <snitzer@redhat.com>
> Cc: Keith Busch <keith.busch@intel.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Reported-by: Pankaj Gupta <pagupta@redhat.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Thanks for the fix. The patch looks good to me so feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
> Hi Mike,
> 
> Another day another new dax operation to allow device-mapper to better
> scope dax operations.
> 
> Let me know if the device-mapper changes look sane. This passes a new
> unit test that indeed fails on current mainline.
> 
> https://github.com/pmem/ndctl/blob/device-mapper-pending/test/dm.sh
> 
>  drivers/dax/super.c          |   88 +++++++++++++++++++++++++++---------------
>  drivers/md/dm-table.c        |   17 +++++---
>  drivers/md/dm.c              |   20 ++++++++++
>  drivers/md/dm.h              |    1 
>  drivers/nvdimm/pmem.c        |    1 
>  drivers/s390/block/dcssblk.c |    1 
>  include/linux/dax.h          |   19 +++++++++
>  7 files changed, 110 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index 0a339b85133e..ec2f2262e3a9 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -73,22 +73,12 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
>  EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
>  #endif
>  
> -/**
> - * __bdev_dax_supported() - Check if the device supports dax for filesystem
> - * @bdev: block device to check
> - * @blocksize: The block size of the device
> - *
> - * This is a library function for filesystems to check if the block device
> - * can be mounted with dax option.
> - *
> - * Return: true if supported, false if unsupported
> - */
> -bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
> +bool generic_fsdax_supported(struct dax_device *dax_dev,
> +		struct block_device *bdev, int blocksize, sector_t start,
> +		sector_t sectors)
>  {
> -	struct dax_device *dax_dev;
>  	bool dax_enabled = false;
>  	pgoff_t pgoff, pgoff_end;
> -	struct request_queue *q;
>  	char buf[BDEVNAME_SIZE];
>  	void *kaddr, *end_kaddr;
>  	pfn_t pfn, end_pfn;
> @@ -102,21 +92,14 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
>  		return false;
>  	}
>  
> -	q = bdev_get_queue(bdev);
> -	if (!q || !blk_queue_dax(q)) {
> -		pr_debug("%s: error: request queue doesn't support dax\n",
> -				bdevname(bdev, buf));
> -		return false;
> -	}
> -
> -	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
> +	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
>  	if (err) {
>  		pr_debug("%s: error: unaligned partition for dax\n",
>  				bdevname(bdev, buf));
>  		return false;
>  	}
>  
> -	last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
> +	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
>  	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
>  	if (err) {
>  		pr_debug("%s: error: unaligned partition for dax\n",
> @@ -124,20 +107,11 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
>  		return false;
>  	}
>  
> -	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
> -	if (!dax_dev) {
> -		pr_debug("%s: error: device does not support dax\n",
> -				bdevname(bdev, buf));
> -		return false;
> -	}
> -
>  	id = dax_read_lock();
>  	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
>  	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
>  	dax_read_unlock(id);
>  
> -	put_dax(dax_dev);
> -
>  	if (len < 1 || len2 < 1) {
>  		pr_debug("%s: error: dax access failed (%ld)\n",
>  				bdevname(bdev, buf), len < 1 ? len : len2);
> @@ -178,6 +152,49 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
>  	}
>  	return true;
>  }
> +EXPORT_SYMBOL_GPL(generic_fsdax_supported);
> +
> +/**
> + * __bdev_dax_supported() - Check if the device supports dax for filesystem
> + * @bdev: block device to check
> + * @blocksize: The block size of the device
> + *
> + * This is a library function for filesystems to check if the block device
> + * can be mounted with dax option.
> + *
> + * Return: true if supported, false if unsupported
> + */
> +bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
> +{
> +	struct dax_device *dax_dev;
> +	struct request_queue *q;
> +	char buf[BDEVNAME_SIZE];
> +	bool ret;
> +	int id;
> +
> +	q = bdev_get_queue(bdev);
> +	if (!q || !blk_queue_dax(q)) {
> +		pr_debug("%s: error: request queue doesn't support dax\n",
> +				bdevname(bdev, buf));
> +		return false;
> +	}
> +
> +	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
> +	if (!dax_dev) {
> +		pr_debug("%s: error: device does not support dax\n",
> +				bdevname(bdev, buf));
> +		return false;
> +	}
> +
> +	id = dax_read_lock();
> +	ret = dax_supported(dax_dev, bdev, blocksize, 0,
> +			i_size_read(bdev->bd_inode) / 512);
> +	dax_read_unlock(id);
> +
> +	put_dax(dax_dev);
> +
> +	return ret;
> +}
>  EXPORT_SYMBOL_GPL(__bdev_dax_supported);
>  #endif
>  
> @@ -303,6 +320,15 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
>  }
>  EXPORT_SYMBOL_GPL(dax_direct_access);
>  
> +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
> +		int blocksize, sector_t start, sector_t len)
> +{
> +	if (!dax_alive(dax_dev))
> +		return false;
> +
> +	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
> +}
> +
>  size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
>  		size_t bytes, struct iov_iter *i)
>  {
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index cde3b49b2a91..350cf0451456 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -880,13 +880,17 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
>  }
>  EXPORT_SYMBOL_GPL(dm_table_set_type);
>  
> +/* validate the dax capability of the target device span */
>  static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
> -			       sector_t start, sector_t len, void *data)
> +				       sector_t start, sector_t len, void *data)
>  {
> -	return bdev_dax_supported(dev->bdev, PAGE_SIZE);
> +	int blocksize = *(int *) data;
> +
> +	return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
> +			start, len);
>  }
>  
> -static bool dm_table_supports_dax(struct dm_table *t)
> +bool dm_table_supports_dax(struct dm_table *t, int blocksize)
>  {
>  	struct dm_target *ti;
>  	unsigned i;
> @@ -899,7 +903,8 @@ static bool dm_table_supports_dax(struct dm_table *t)
>  			return false;
>  
>  		if (!ti->type->iterate_devices ||
> -		    !ti->type->iterate_devices(ti, device_supports_dax, NULL))
> +		    !ti->type->iterate_devices(ti, device_supports_dax,
> +			    &blocksize))
>  			return false;
>  	}
>  
> @@ -979,7 +984,7 @@ static int dm_table_determine_type(struct dm_table *t)
>  verify_bio_based:
>  		/* We must use this table as bio-based */
>  		t->type = DM_TYPE_BIO_BASED;
> -		if (dm_table_supports_dax(t) ||
> +		if (dm_table_supports_dax(t, PAGE_SIZE) ||
>  		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
>  			t->type = DM_TYPE_DAX_BIO_BASED;
>  		} else {
> @@ -1905,7 +1910,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
>  	}
>  	blk_queue_write_cache(q, wc, fua);
>  
> -	if (dm_table_supports_dax(t))
> +	if (dm_table_supports_dax(t, PAGE_SIZE))
>  		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
>  	else
>  		blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index 043f0761e4a0..c28787f5357b 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -1105,6 +1105,25 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
>  	return ret;
>  }
>  
> +static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
> +		int blocksize, sector_t start, sector_t len)
> +{
> +	struct mapped_device *md = dax_get_private(dax_dev);
> +	struct dm_table *map;
> +	int srcu_idx;
> +	bool ret;
> +
> +	map = dm_get_live_table(md, &srcu_idx);
> +	if (!map)
> +		return false;
> +
> +	ret = dm_table_supports_dax(map, blocksize);
> +
> +	dm_put_live_table(md, srcu_idx);
> +
> +	return ret;
> +}
> +
>  static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
>  				    void *addr, size_t bytes, struct iov_iter *i)
>  {
> @@ -3192,6 +3211,7 @@ static const struct block_device_operations dm_blk_dops = {
>  
>  static const struct dax_operations dm_dax_ops = {
>  	.direct_access = dm_dax_direct_access,
> +	.dax_supported = dm_dax_supported,
>  	.copy_from_iter = dm_dax_copy_from_iter,
>  	.copy_to_iter = dm_dax_copy_to_iter,
>  };
> diff --git a/drivers/md/dm.h b/drivers/md/dm.h
> index 2d539b82ec08..e5e240bfa2d0 100644
> --- a/drivers/md/dm.h
> +++ b/drivers/md/dm.h
> @@ -78,6 +78,7 @@ void dm_unlock_md_type(struct mapped_device *md);
>  void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
>  enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
>  struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
> +bool dm_table_supports_dax(struct dm_table *t, int blocksize);
>  
>  int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
>  
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index 0279eb1da3ef..845c5b430cdd 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -295,6 +295,7 @@ static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
>  
>  static const struct dax_operations pmem_dax_ops = {
>  	.direct_access = pmem_dax_direct_access,
> +	.dax_supported = generic_fsdax_supported,
>  	.copy_from_iter = pmem_copy_from_iter,
>  	.copy_to_iter = pmem_copy_to_iter,
>  };
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index 4e8aedd50cb0..d04d4378ca50 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -59,6 +59,7 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
>  
>  static const struct dax_operations dcssblk_dax_ops = {
>  	.direct_access = dcssblk_dax_direct_access,
> +	.dax_supported = generic_fsdax_supported,
>  	.copy_from_iter = dcssblk_dax_copy_from_iter,
>  	.copy_to_iter = dcssblk_dax_copy_to_iter,
>  };
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 0dd316a74a29..f5544fc62319 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -19,6 +19,12 @@ struct dax_operations {
>  	 */
>  	long (*direct_access)(struct dax_device *, pgoff_t, long,
>  			void **, pfn_t *);
> +	/*
> +	 * Validate whether this device is usable as an fsdax backing
> +	 * device.
> +	 */
> +	bool (*dax_supported)(struct dax_device *, struct block_device *, int,
> +			sector_t, sector_t);
>  	/* copy_from_iter: required operation for fs-dax direct-i/o */
>  	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
>  			struct iov_iter *);
> @@ -75,6 +81,10 @@ static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
>  	return __bdev_dax_supported(bdev, blocksize);
>  }
>  
> +bool generic_fsdax_supported(struct dax_device *dax_dev,
> +		struct block_device *bdev, int blocksize, sector_t start,
> +		sector_t sectors);
> +
>  static inline struct dax_device *fs_dax_get_by_host(const char *host)
>  {
>  	return dax_get_by_host(host);
> @@ -99,6 +109,13 @@ static inline bool bdev_dax_supported(struct block_device *bdev,
>  	return false;
>  }
>  
> +static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
> +		struct block_device *bdev, int blocksize, sector_t start,
> +		sector_t sectors)
> +{
> +	return false;
> +}
> +
>  static inline struct dax_device *fs_dax_get_by_host(const char *host)
>  {
>  	return NULL;
> @@ -142,6 +159,8 @@ bool dax_alive(struct dax_device *dax_dev);
>  void *dax_get_private(struct dax_device *dax_dev);
>  long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
>  		void **kaddr, pfn_t *pfn);
> +bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
> +		int blocksize, sector_t start, sector_t len);
>  size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
>  		size_t bytes, struct iov_iter *i);
>  size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
>
Pankaj Gupta May 15, 2019, 8:38 a.m. UTC | #2
> 
> Pankaj reports that starting with commit ad428cdb525a "dax: Check the
> end of the block-device capacity with dax_direct_access()" device-mapper
> no longer allows dax operation. This results from the stricter checks in
> __bdev_dax_supported() that validate that the start and end of a
> block-device map to the same 'pagemap' instance.
> 
> Teach the dax-core and device-mapper to validate the 'pagemap' on a
> per-target basis. This is accomplished by refactoring the
> bdev_dax_supported() internals into generic_fsdax_supported() which
> takes a sector range to validate. Consequently generic_fsdax_supported()
> is suitable to be used in a device-mapper ->iterate_devices() callback.
> A new ->dax_supported() operation is added to allow composite devices to
> split and route upper-level bdev_dax_supported() requests.
> 
> Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
> Cc: <stable@vger.kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Mike Snitzer <snitzer@redhat.com>
> Cc: Keith Busch <keith.busch@intel.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Reported-by: Pankaj Gupta <pagupta@redhat.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Thank you for the patch. Looks good to me. 
I also tested the patch and it works well.

Reviewed-and-Tested-by: Pankaj Gupta <pagupta@redhat.com> 

Best regards,
Pankaj
Mike Snitzer May 16, 2019, 6:57 p.m. UTC | #3
On Tue, May 14 2019 at 11:48pm -0400,
Dan Williams <dan.j.williams@intel.com> wrote:

> Pankaj reports that starting with commit ad428cdb525a "dax: Check the
> end of the block-device capacity with dax_direct_access()" device-mapper
> no longer allows dax operation. This results from the stricter checks in
> __bdev_dax_supported() that validate that the start and end of a
> block-device map to the same 'pagemap' instance.
> 
> Teach the dax-core and device-mapper to validate the 'pagemap' on a
> per-target basis. This is accomplished by refactoring the
> bdev_dax_supported() internals into generic_fsdax_supported() which
> takes a sector range to validate. Consequently generic_fsdax_supported()
> is suitable to be used in a device-mapper ->iterate_devices() callback.
> A new ->dax_supported() operation is added to allow composite devices to
> split and route upper-level bdev_dax_supported() requests.
> 
> Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
> Cc: <stable@vger.kernel.org>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Mike Snitzer <snitzer@redhat.com>
> Cc: Keith Busch <keith.busch@intel.com>
> Cc: Matthew Wilcox <willy@infradead.org>
> Cc: Vishal Verma <vishal.l.verma@intel.com>
> Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> Reported-by: Pankaj Gupta <pagupta@redhat.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
> Hi Mike,
> 
> Another day another new dax operation to allow device-mapper to better
> scope dax operations.
> 
> Let me know if the device-mapper changes look sane. This passes a new
> unit test that indeed fails on current mainline.
> 
> https://github.com/pmem/ndctl/blob/device-mapper-pending/test/dm.sh
> 
>  drivers/dax/super.c          |   88 +++++++++++++++++++++++++++---------------
>  drivers/md/dm-table.c        |   17 +++++---
>  drivers/md/dm.c              |   20 ++++++++++
>  drivers/md/dm.h              |    1 
>  drivers/nvdimm/pmem.c        |    1 
>  drivers/s390/block/dcssblk.c |    1 
>  include/linux/dax.h          |   19 +++++++++
>  7 files changed, 110 insertions(+), 37 deletions(-)
> 

...

> diff --git a/drivers/md/dm.h b/drivers/md/dm.h
> index 2d539b82ec08..e5e240bfa2d0 100644
> --- a/drivers/md/dm.h
> +++ b/drivers/md/dm.h
> @@ -78,6 +78,7 @@ void dm_unlock_md_type(struct mapped_device *md);
>  void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
>  enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
>  struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
> +bool dm_table_supports_dax(struct dm_table *t, int blocksize);
>  
>  int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
>  

I'd prefer to have dm_table_supports_dax come just after
dm_table_get_md_mempools in the preceding dm_table section of dm.h (just
above this mapped_device section you extended).

But other than that nit, patch looks great on a DM level:

Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Dan Williams May 16, 2019, 9:10 p.m. UTC | #4
On Thu, May 16, 2019 at 11:58 AM Mike Snitzer <snitzer@redhat.com> wrote:
>
> On Tue, May 14 2019 at 11:48pm -0400,
> Dan Williams <dan.j.williams@intel.com> wrote:
>
> > Pankaj reports that starting with commit ad428cdb525a "dax: Check the
> > end of the block-device capacity with dax_direct_access()" device-mapper
> > no longer allows dax operation. This results from the stricter checks in
> > __bdev_dax_supported() that validate that the start and end of a
> > block-device map to the same 'pagemap' instance.
> >
> > Teach the dax-core and device-mapper to validate the 'pagemap' on a
> > per-target basis. This is accomplished by refactoring the
> > bdev_dax_supported() internals into generic_fsdax_supported() which
> > takes a sector range to validate. Consequently generic_fsdax_supported()
> > is suitable to be used in a device-mapper ->iterate_devices() callback.
> > A new ->dax_supported() operation is added to allow composite devices to
> > split and route upper-level bdev_dax_supported() requests.
> >
> > Fixes: ad428cdb525a ("dax: Check the end of the block-device...")
> > Cc: <stable@vger.kernel.org>
> > Cc: Jan Kara <jack@suse.cz>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Cc: Dave Jiang <dave.jiang@intel.com>
> > Cc: Mike Snitzer <snitzer@redhat.com>
> > Cc: Keith Busch <keith.busch@intel.com>
> > Cc: Matthew Wilcox <willy@infradead.org>
> > Cc: Vishal Verma <vishal.l.verma@intel.com>
> > Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
> > Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
> > Reported-by: Pankaj Gupta <pagupta@redhat.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> > Hi Mike,
> >
> > Another day another new dax operation to allow device-mapper to better
> > scope dax operations.
> >
> > Let me know if the device-mapper changes look sane. This passes a new
> > unit test that indeed fails on current mainline.
> >
> > https://github.com/pmem/ndctl/blob/device-mapper-pending/test/dm.sh
> >
> >  drivers/dax/super.c          |   88 +++++++++++++++++++++++++++---------------
> >  drivers/md/dm-table.c        |   17 +++++---
> >  drivers/md/dm.c              |   20 ++++++++++
> >  drivers/md/dm.h              |    1
> >  drivers/nvdimm/pmem.c        |    1
> >  drivers/s390/block/dcssblk.c |    1
> >  include/linux/dax.h          |   19 +++++++++
> >  7 files changed, 110 insertions(+), 37 deletions(-)
> >
>
> ...
>
> > diff --git a/drivers/md/dm.h b/drivers/md/dm.h
> > index 2d539b82ec08..e5e240bfa2d0 100644
> > --- a/drivers/md/dm.h
> > +++ b/drivers/md/dm.h
> > @@ -78,6 +78,7 @@ void dm_unlock_md_type(struct mapped_device *md);
> >  void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
> >  enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
> >  struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
> > +bool dm_table_supports_dax(struct dm_table *t, int blocksize);
> >
> >  int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
> >
>
> I'd prefer to have dm_table_supports_dax come just after
> dm_table_get_md_mempools in the preceding dm_table section of dm.h (just
> above this mapped_device section you extended).
>
> But other than that nit, patch looks great on a DM level:
>
> Reviewed-by: Mike Snitzer <snitzer@redhat.com>

Thanks Mike, I folded in this change:

@@ -72,13 +72,13 @@ bool dm_table_bio_based(struct dm_table *t);
 bool dm_table_request_based(struct dm_table *t);
 void dm_table_free_md_mempools(struct dm_table *t);
 struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
+bool dm_table_supports_dax(struct dm_table *t, int blocksize);

 void dm_lock_md_type(struct mapped_device *md);
 void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
 enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
 struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
-bool dm_table_supports_dax(struct dm_table *t, int blocksize);

 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
diff mbox series

Patch

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 0a339b85133e..ec2f2262e3a9 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -73,22 +73,12 @@  struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
 #endif
 
-/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @bdev: block device to check
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: true if supported, false if unsupported
- */
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+bool generic_fsdax_supported(struct dax_device *dax_dev,
+		struct block_device *bdev, int blocksize, sector_t start,
+		sector_t sectors)
 {
-	struct dax_device *dax_dev;
 	bool dax_enabled = false;
 	pgoff_t pgoff, pgoff_end;
-	struct request_queue *q;
 	char buf[BDEVNAME_SIZE];
 	void *kaddr, *end_kaddr;
 	pfn_t pfn, end_pfn;
@@ -102,21 +92,14 @@  bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
 		return false;
 	}
 
-	q = bdev_get_queue(bdev);
-	if (!q || !blk_queue_dax(q)) {
-		pr_debug("%s: error: request queue doesn't support dax\n",
-				bdevname(bdev, buf));
-		return false;
-	}
-
-	err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff);
+	err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
 	if (err) {
 		pr_debug("%s: error: unaligned partition for dax\n",
 				bdevname(bdev, buf));
 		return false;
 	}
 
-	last_page = PFN_DOWN(i_size_read(bdev->bd_inode) - 1) * 8;
+	last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
 	err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
 	if (err) {
 		pr_debug("%s: error: unaligned partition for dax\n",
@@ -124,20 +107,11 @@  bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
 		return false;
 	}
 
-	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-	if (!dax_dev) {
-		pr_debug("%s: error: device does not support dax\n",
-				bdevname(bdev, buf));
-		return false;
-	}
-
 	id = dax_read_lock();
 	len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn);
 	len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
 	dax_read_unlock(id);
 
-	put_dax(dax_dev);
-
 	if (len < 1 || len2 < 1) {
 		pr_debug("%s: error: dax access failed (%ld)\n",
 				bdevname(bdev, buf), len < 1 ? len : len2);
@@ -178,6 +152,49 @@  bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
 	}
 	return true;
 }
+EXPORT_SYMBOL_GPL(generic_fsdax_supported);
+
+/**
+ * __bdev_dax_supported() - Check if the device supports dax for filesystem
+ * @bdev: block device to check
+ * @blocksize: The block size of the device
+ *
+ * This is a library function for filesystems to check if the block device
+ * can be mounted with dax option.
+ *
+ * Return: true if supported, false if unsupported
+ */
+bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+{
+	struct dax_device *dax_dev;
+	struct request_queue *q;
+	char buf[BDEVNAME_SIZE];
+	bool ret;
+	int id;
+
+	q = bdev_get_queue(bdev);
+	if (!q || !blk_queue_dax(q)) {
+		pr_debug("%s: error: request queue doesn't support dax\n",
+				bdevname(bdev, buf));
+		return false;
+	}
+
+	dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+	if (!dax_dev) {
+		pr_debug("%s: error: device does not support dax\n",
+				bdevname(bdev, buf));
+		return false;
+	}
+
+	id = dax_read_lock();
+	ret = dax_supported(dax_dev, bdev, blocksize, 0,
+			i_size_read(bdev->bd_inode) / 512);
+	dax_read_unlock(id);
+
+	put_dax(dax_dev);
+
+	return ret;
+}
 EXPORT_SYMBOL_GPL(__bdev_dax_supported);
 #endif
 
@@ -303,6 +320,15 @@  long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 }
 EXPORT_SYMBOL_GPL(dax_direct_access);
 
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+		int blocksize, sector_t start, sector_t len)
+{
+	if (!dax_alive(dax_dev))
+		return false;
+
+	return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
+}
+
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i)
 {
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index cde3b49b2a91..350cf0451456 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -880,13 +880,17 @@  void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
 }
 EXPORT_SYMBOL_GPL(dm_table_set_type);
 
+/* validate the dax capability of the target device span */
 static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev,
-			       sector_t start, sector_t len, void *data)
+				       sector_t start, sector_t len, void *data)
 {
-	return bdev_dax_supported(dev->bdev, PAGE_SIZE);
+	int blocksize = *(int *) data;
+
+	return generic_fsdax_supported(dev->dax_dev, dev->bdev, blocksize,
+			start, len);
 }
 
-static bool dm_table_supports_dax(struct dm_table *t)
+bool dm_table_supports_dax(struct dm_table *t, int blocksize)
 {
 	struct dm_target *ti;
 	unsigned i;
@@ -899,7 +903,8 @@  static bool dm_table_supports_dax(struct dm_table *t)
 			return false;
 
 		if (!ti->type->iterate_devices ||
-		    !ti->type->iterate_devices(ti, device_supports_dax, NULL))
+		    !ti->type->iterate_devices(ti, device_supports_dax,
+			    &blocksize))
 			return false;
 	}
 
@@ -979,7 +984,7 @@  static int dm_table_determine_type(struct dm_table *t)
 verify_bio_based:
 		/* We must use this table as bio-based */
 		t->type = DM_TYPE_BIO_BASED;
-		if (dm_table_supports_dax(t) ||
+		if (dm_table_supports_dax(t, PAGE_SIZE) ||
 		    (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) {
 			t->type = DM_TYPE_DAX_BIO_BASED;
 		} else {
@@ -1905,7 +1910,7 @@  void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 	blk_queue_write_cache(q, wc, fua);
 
-	if (dm_table_supports_dax(t))
+	if (dm_table_supports_dax(t, PAGE_SIZE))
 		blk_queue_flag_set(QUEUE_FLAG_DAX, q);
 	else
 		blk_queue_flag_clear(QUEUE_FLAG_DAX, q);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 043f0761e4a0..c28787f5357b 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1105,6 +1105,25 @@  static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
 	return ret;
 }
 
+static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+		int blocksize, sector_t start, sector_t len)
+{
+	struct mapped_device *md = dax_get_private(dax_dev);
+	struct dm_table *map;
+	int srcu_idx;
+	bool ret;
+
+	map = dm_get_live_table(md, &srcu_idx);
+	if (!map)
+		return false;
+
+	ret = dm_table_supports_dax(map, blocksize);
+
+	dm_put_live_table(md, srcu_idx);
+
+	return ret;
+}
+
 static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 				    void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -3192,6 +3211,7 @@  static const struct block_device_operations dm_blk_dops = {
 
 static const struct dax_operations dm_dax_ops = {
 	.direct_access = dm_dax_direct_access,
+	.dax_supported = dm_dax_supported,
 	.copy_from_iter = dm_dax_copy_from_iter,
 	.copy_to_iter = dm_dax_copy_to_iter,
 };
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 2d539b82ec08..e5e240bfa2d0 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -78,6 +78,7 @@  void dm_unlock_md_type(struct mapped_device *md);
 void dm_set_md_type(struct mapped_device *md, enum dm_queue_mode type);
 enum dm_queue_mode dm_get_md_type(struct mapped_device *md);
 struct target_type *dm_get_immutable_target_type(struct mapped_device *md);
+bool dm_table_supports_dax(struct dm_table *t, int blocksize);
 
 int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0279eb1da3ef..845c5b430cdd 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -295,6 +295,7 @@  static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 
 static const struct dax_operations pmem_dax_ops = {
 	.direct_access = pmem_dax_direct_access,
+	.dax_supported = generic_fsdax_supported,
 	.copy_from_iter = pmem_copy_from_iter,
 	.copy_to_iter = pmem_copy_to_iter,
 };
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 4e8aedd50cb0..d04d4378ca50 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -59,6 +59,7 @@  static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev,
 
 static const struct dax_operations dcssblk_dax_ops = {
 	.direct_access = dcssblk_dax_direct_access,
+	.dax_supported = generic_fsdax_supported,
 	.copy_from_iter = dcssblk_dax_copy_from_iter,
 	.copy_to_iter = dcssblk_dax_copy_to_iter,
 };
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0dd316a74a29..f5544fc62319 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -19,6 +19,12 @@  struct dax_operations {
 	 */
 	long (*direct_access)(struct dax_device *, pgoff_t, long,
 			void **, pfn_t *);
+	/*
+	 * Validate whether this device is usable as an fsdax backing
+	 * device.
+	 */
+	bool (*dax_supported)(struct dax_device *, struct block_device *, int,
+			sector_t, sector_t);
 	/* copy_from_iter: required operation for fs-dax direct-i/o */
 	size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t,
 			struct iov_iter *);
@@ -75,6 +81,10 @@  static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
 	return __bdev_dax_supported(bdev, blocksize);
 }
 
+bool generic_fsdax_supported(struct dax_device *dax_dev,
+		struct block_device *bdev, int blocksize, sector_t start,
+		sector_t sectors);
+
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
 {
 	return dax_get_by_host(host);
@@ -99,6 +109,13 @@  static inline bool bdev_dax_supported(struct block_device *bdev,
 	return false;
 }
 
+static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+		struct block_device *bdev, int blocksize, sector_t start,
+		sector_t sectors)
+{
+	return false;
+}
+
 static inline struct dax_device *fs_dax_get_by_host(const char *host)
 {
 	return NULL;
@@ -142,6 +159,8 @@  bool dax_alive(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+		int blocksize, sector_t start, sector_t len);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,