[resend,v2,11/33] dm: add dax_device and dax_operations support
diff mbox

Message ID 149245618859.10206.13182319600260215993.stgit@dwillia2-desk3.amr.corp.intel.com
State New
Headers show

Commit Message

Dan Williams April 17, 2017, 7:09 p.m. UTC
Allocate a dax_device to represent the capacity of a device-mapper
instance. Provide a ->direct_access() method via the new dax_operations
indirection that mirrors the functionality of the current direct_access
support via block_device_operations.  Once fs/dax.c has been converted
to use dax_operations the old dm_blk_direct_access() will be removed.

A new helper dm_dax_get_live_target() is introduced to separate some of
the dm-specifics from the direct_access implementation.

This enabling is only for the top-level dm representation to upper
layers. Converting target direct_access implementations is deferred to a
separate patch.

Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/md/Kconfig            |    1 
 drivers/md/dm-core.h          |    1 
 drivers/md/dm.c               |   84 ++++++++++++++++++++++++++++++++++-------
 include/linux/device-mapper.h |    1 
 4 files changed, 73 insertions(+), 14 deletions(-)

Comments

Dan Williams April 20, 2017, 4:30 p.m. UTC | #1
On Mon, Apr 17, 2017 at 12:09 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> Allocate a dax_device to represent the capacity of a device-mapper
> instance. Provide a ->direct_access() method via the new dax_operations
> indirection that mirrors the functionality of the current direct_access
> support via block_device_operations.  Once fs/dax.c has been converted
> to use dax_operations the old dm_blk_direct_access() will be removed.
>
> A new helper dm_dax_get_live_target() is introduced to separate some of
> the dm-specifics from the direct_access implementation.
>
> This enabling is only for the top-level dm representation to upper
> layers. Converting target direct_access implementations is deferred to a
> separate patch.
>
> Cc: Toshi Kani <toshi.kani@hpe.com>
> Cc: Mike Snitzer <snitzer@redhat.com>

Hi Mike,

Any concerns with these dax_device and dax_operations changes to
device-mapper for the upcoming merge window?


> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/md/Kconfig            |    1
>  drivers/md/dm-core.h          |    1
>  drivers/md/dm.c               |   84 ++++++++++++++++++++++++++++++++++-------
>  include/linux/device-mapper.h |    1
>  4 files changed, 73 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> index b7767da50c26..1de8372d9459 100644
> --- a/drivers/md/Kconfig
> +++ b/drivers/md/Kconfig
> @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
>  config BLK_DEV_DM
>         tristate "Device mapper support"
>         select BLK_DEV_DM_BUILTIN
> +       select DAX
>         ---help---
>           Device-mapper is a low level volume manager.  It works by allowing
>           people to specify mappings for ranges of logical sectors.  Various
> diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
> index 136fda3ff9e5..538630190f66 100644
> --- a/drivers/md/dm-core.h
> +++ b/drivers/md/dm-core.h
> @@ -58,6 +58,7 @@ struct mapped_device {
>         struct target_type *immutable_target_type;
>
>         struct gendisk *disk;
> +       struct dax_device *dax_dev;
>         char name[16];
>
>         void *interface_ptr;
> diff --git a/drivers/md/dm.c b/drivers/md/dm.c
> index dfb75979e455..bd56dfe43a99 100644
> --- a/drivers/md/dm.c
> +++ b/drivers/md/dm.c
> @@ -16,6 +16,7 @@
>  #include <linux/blkpg.h>
>  #include <linux/bio.h>
>  #include <linux/mempool.h>
> +#include <linux/dax.h>
>  #include <linux/slab.h>
>  #include <linux/idr.h>
>  #include <linux/hdreg.h>
> @@ -908,31 +909,68 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
>  }
>  EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
>
> -static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
> -                                void **kaddr, pfn_t *pfn, long size)
> +static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
> +               sector_t sector, int *srcu_idx)
>  {
> -       struct mapped_device *md = bdev->bd_disk->private_data;
>         struct dm_table *map;
>         struct dm_target *ti;
> -       int srcu_idx;
> -       long len, ret = -EIO;
>
> -       map = dm_get_live_table(md, &srcu_idx);
> +       map = dm_get_live_table(md, srcu_idx);
>         if (!map)
> -               goto out;
> +               return NULL;
>
>         ti = dm_table_find_target(map, sector);
>         if (!dm_target_is_valid(ti))
> -               goto out;
> +               return NULL;
>
> -       len = max_io_len(sector, ti) << SECTOR_SHIFT;
> -       size = min(len, size);
> +       return ti;
> +}
>
> -       if (ti->type->direct_access)
> -               ret = ti->type->direct_access(ti, sector, kaddr, pfn, size);
> -out:
> +static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
> +               long nr_pages, void **kaddr, pfn_t *pfn)
> +{
> +       struct mapped_device *md = dax_get_private(dax_dev);
> +       sector_t sector = pgoff * PAGE_SECTORS;
> +       struct dm_target *ti;
> +       long len, ret = -EIO;
> +       int srcu_idx;
> +
> +       ti = dm_dax_get_live_target(md, sector, &srcu_idx);
> +
> +       if (!ti)
> +               goto out;
> +       if (!ti->type->direct_access)
> +               goto out;
> +       len = max_io_len(sector, ti) / PAGE_SECTORS;
> +       if (len < 1)
> +               goto out;
> +       nr_pages = min(len, nr_pages);
> +       if (ti->type->direct_access) {
> +               ret = ti->type->direct_access(ti, sector, kaddr, pfn,
> +                               nr_pages * PAGE_SIZE);
> +               /*
> +                * FIXME: convert ti->type->direct_access to return
> +                * nr_pages directly.
> +                */
> +               if (ret >= 0)
> +                       ret /= PAGE_SIZE;
> +       }
> + out:
>         dm_put_live_table(md, srcu_idx);
> -       return min(ret, size);
> +
> +       return ret;
> +}
> +
> +static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
> +               void **kaddr, pfn_t *pfn, long size)
> +{
> +       struct mapped_device *md = bdev->bd_disk->private_data;
> +       struct dax_device *dax_dev = md->dax_dev;
> +       long nr_pages = size / PAGE_SIZE;
> +
> +       nr_pages = dm_dax_direct_access(dax_dev, sector / PAGE_SECTORS,
> +                       nr_pages, kaddr, pfn);
> +       return nr_pages < 0 ? nr_pages : nr_pages * PAGE_SIZE;
>  }
>
>  /*
> @@ -1437,6 +1475,7 @@ static int next_free_minor(int *minor)
>  }
>
>  static const struct block_device_operations dm_blk_dops;
> +static const struct dax_operations dm_dax_ops;
>
>  static void dm_wq_work(struct work_struct *work);
>
> @@ -1483,6 +1522,12 @@ static void cleanup_mapped_device(struct mapped_device *md)
>         if (md->bs)
>                 bioset_free(md->bs);
>
> +       if (md->dax_dev) {
> +               kill_dax(md->dax_dev);
> +               put_dax(md->dax_dev);
> +               md->dax_dev = NULL;
> +       }
> +
>         if (md->disk) {
>                 spin_lock(&_minor_lock);
>                 md->disk->private_data = NULL;
> @@ -1510,6 +1555,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
>  static struct mapped_device *alloc_dev(int minor)
>  {
>         int r, numa_node_id = dm_get_numa_node();
> +       struct dax_device *dax_dev;
>         struct mapped_device *md;
>         void *old_md;
>
> @@ -1574,6 +1620,12 @@ static struct mapped_device *alloc_dev(int minor)
>         md->disk->queue = md->queue;
>         md->disk->private_data = md;
>         sprintf(md->disk->disk_name, "dm-%d", minor);
> +
> +       dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
> +       if (!dax_dev)
> +               goto bad;
> +       md->dax_dev = dax_dev;
> +
>         add_disk(md->disk);
>         format_dev_t(md->name, MKDEV(_major, minor));
>
> @@ -2781,6 +2833,10 @@ static const struct block_device_operations dm_blk_dops = {
>         .owner = THIS_MODULE
>  };
>
> +static const struct dax_operations dm_dax_ops = {
> +       .direct_access = dm_dax_direct_access,
> +};
> +
>  /*
>   * module hooks
>   */
> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
> index a7e6903866fd..bcba4d89089c 100644
> --- a/include/linux/device-mapper.h
> +++ b/include/linux/device-mapper.h
> @@ -130,6 +130,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti);
>   */
>  typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
>                                      void **kaddr, pfn_t *pfn, long size);
> +#define PAGE_SECTORS (PAGE_SIZE / 512)
>
>  void dm_error(const char *message);
>
>
Mike Snitzer April 22, 2017, 3:25 p.m. UTC | #2
On Thu, Apr 20 2017 at 12:30pm -0400,
Dan Williams <dan.j.williams@intel.com> wrote:

> On Mon, Apr 17, 2017 at 12:09 PM, Dan Williams <dan.j.williams@intel.com> wrote:
> > Allocate a dax_device to represent the capacity of a device-mapper
> > instance. Provide a ->direct_access() method via the new dax_operations
> > indirection that mirrors the functionality of the current direct_access
> > support via block_device_operations.  Once fs/dax.c has been converted
> > to use dax_operations the old dm_blk_direct_access() will be removed.
> >
> > A new helper dm_dax_get_live_target() is introduced to separate some of
> > the dm-specifics from the direct_access implementation.
> >
> > This enabling is only for the top-level dm representation to upper
> > layers. Converting target direct_access implementations is deferred to a
> > separate patch.
> >
> > Cc: Toshi Kani <toshi.kani@hpe.com>
> > Cc: Mike Snitzer <snitzer@redhat.com>
> 
> Hi Mike,
> 
> Any concerns with these dax_device and dax_operations changes to
> device-mapper for the upcoming merge window?

Sorry for the delay.

I just reviewed them, overall looks good.  The enabling functions in the
DAX code, that are mixed in with the DM changes, could maybe be factored
out into separate earlier patches but I don't feel that strongly about
that.

Feel free to add this tag to the handful of relevant DM patches:

Reviewed-by: Mike Snitzer <snitzer@redhat.com>

I haven't done a merge with the linux-dm.git 'dm-4.12' branch but it'd
be good to verify there aren't any merge conflicts.  If there are then
it'd be nice to know going in to the merge so that we can forecast as
much to Linus.

I really appreciate you doing this work!

Thanks,
Mike
Bart Van Assche July 28, 2017, 4:17 p.m. UTC | #3
On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote:
> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> index b7767da50c26..1de8372d9459 100644
> --- a/drivers/md/Kconfig
> +++ b/drivers/md/Kconfig
> @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
>  config BLK_DEV_DM
>  	tristate "Device mapper support"
>  	select BLK_DEV_DM_BUILTIN
> +	select DAX
>  	---help---
>  	  Device-mapper is a low level volume manager.  It works by allowing
>  	  people to specify mappings for ranges of logical sectors.  Various

(replying to an e-mail of three months ago)

Hello Dan,

While building a v4.12 kernel I noticed that enabling device mapper support
now unconditionally enables DAX. I think there are plenty of systems that use
dm but do not need DAX. Have you considered to rework this such that instead
of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is
enabled?

Thanks,

Bart.
Mike Snitzer July 28, 2017, 5:48 p.m. UTC | #4
On Fri, Jul 28 2017 at 12:17pm -0400,
Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote:
> > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
> > index b7767da50c26..1de8372d9459 100644
> > --- a/drivers/md/Kconfig
> > +++ b/drivers/md/Kconfig
> > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
> >  config BLK_DEV_DM
> >  	tristate "Device mapper support"
> >  	select BLK_DEV_DM_BUILTIN
> > +	select DAX
> >  	---help---
> >  	  Device-mapper is a low level volume manager.  It works by allowing
> >  	  people to specify mappings for ranges of logical sectors.  Various
> 
> (replying to an e-mail of three months ago)
> 
> Hello Dan,
> 
> While building a v4.12 kernel I noticed that enabling device mapper support
> now unconditionally enables DAX. I think there are plenty of systems that use
> dm but do not need DAX. Have you considered to rework this such that instead
> of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is
> enabled?

I haven't but patches to do so would be welcomed.

Mike
Dan Williams July 29, 2017, 7:57 p.m. UTC | #5
On Fri, Jul 28, 2017 at 9:17 AM, Bart Van Assche <Bart.VanAssche@wdc.com> wrote:
> On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote:
>> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
>> index b7767da50c26..1de8372d9459 100644
>> --- a/drivers/md/Kconfig
>> +++ b/drivers/md/Kconfig
>> @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN
>>  config BLK_DEV_DM
>>       tristate "Device mapper support"
>>       select BLK_DEV_DM_BUILTIN
>> +     select DAX
>>       ---help---
>>         Device-mapper is a low level volume manager.  It works by allowing
>>         people to specify mappings for ranges of logical sectors.  Various
>
> (replying to an e-mail of three months ago)
>
> Hello Dan,
>
> While building a v4.12 kernel I noticed that enabling device mapper support
> now unconditionally enables DAX. I think there are plenty of systems that use
> dm but do not need DAX. Have you considered to rework this such that instead
> of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is
> enabled?
>

I'd rather flip this around and add a CONFIG_DM_DAX that gates whether
DM enables / links to the DAX core. I'll take a look at a patch.
Bart Van Assche July 29, 2017, 9:24 p.m. UTC | #6
On Sat, 2017-07-29 at 12:57 -0700, Dan Williams wrote:
> On Fri, Jul 28, 2017 at 9:17 AM, Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> > On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote:

> > > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig

> > > index b7767da50c26..1de8372d9459 100644

> > > --- a/drivers/md/Kconfig

> > > +++ b/drivers/md/Kconfig

> > > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN

> > >  config BLK_DEV_DM

> > >       tristate "Device mapper support"

> > >       select BLK_DEV_DM_BUILTIN

> > > +     select DAX

> > >       ---help---

> > >         Device-mapper is a low level volume manager.  It works by allowing

> > >         people to specify mappings for ranges of logical sectors.  Various

> > 

> > (replying to an e-mail of three months ago)

> > 

> > Hello Dan,

> > 

> > While building a v4.12 kernel I noticed that enabling device mapper support

> > now unconditionally enables DAX. I think there are plenty of systems that use

> > dm but do not need DAX. Have you considered to rework this such that instead

> > of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is

> > enabled?

> 

> I'd rather flip this around and add a CONFIG_DM_DAX that gates whether

> DM enables / links to the DAX core. I'll take a look at a patch.


Thanks! Please also consider to move all DAX-related dm code into a separate
source file such that the number of #ifdef CONFIG_DM_DAX statements can be
kept to an absolute minimum.

Bart.

Patch
diff mbox

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b7767da50c26..1de8372d9459 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -200,6 +200,7 @@  config BLK_DEV_DM_BUILTIN
 config BLK_DEV_DM
 	tristate "Device mapper support"
 	select BLK_DEV_DM_BUILTIN
+	select DAX
 	---help---
 	  Device-mapper is a low level volume manager.  It works by allowing
 	  people to specify mappings for ranges of logical sectors.  Various
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 136fda3ff9e5..538630190f66 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -58,6 +58,7 @@  struct mapped_device {
 	struct target_type *immutable_target_type;
 
 	struct gendisk *disk;
+	struct dax_device *dax_dev;
 	char name[16];
 
 	void *interface_ptr;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index dfb75979e455..bd56dfe43a99 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -16,6 +16,7 @@ 
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/mempool.h>
+#include <linux/dax.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/hdreg.h>
@@ -908,31 +909,68 @@  int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
 }
 EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
-static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
-				 void **kaddr, pfn_t *pfn, long size)
+static struct dm_target *dm_dax_get_live_target(struct mapped_device *md,
+		sector_t sector, int *srcu_idx)
 {
-	struct mapped_device *md = bdev->bd_disk->private_data;
 	struct dm_table *map;
 	struct dm_target *ti;
-	int srcu_idx;
-	long len, ret = -EIO;
 
-	map = dm_get_live_table(md, &srcu_idx);
+	map = dm_get_live_table(md, srcu_idx);
 	if (!map)
-		goto out;
+		return NULL;
 
 	ti = dm_table_find_target(map, sector);
 	if (!dm_target_is_valid(ti))
-		goto out;
+		return NULL;
 
-	len = max_io_len(sector, ti) << SECTOR_SHIFT;
-	size = min(len, size);
+	return ti;
+}
 
-	if (ti->type->direct_access)
-		ret = ti->type->direct_access(ti, sector, kaddr, pfn, size);
-out:
+static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
+		long nr_pages, void **kaddr, pfn_t *pfn)
+{
+	struct mapped_device *md = dax_get_private(dax_dev);
+	sector_t sector = pgoff * PAGE_SECTORS;
+	struct dm_target *ti;
+	long len, ret = -EIO;
+	int srcu_idx;
+
+	ti = dm_dax_get_live_target(md, sector, &srcu_idx);
+
+	if (!ti)
+		goto out;
+	if (!ti->type->direct_access)
+		goto out;
+	len = max_io_len(sector, ti) / PAGE_SECTORS;
+	if (len < 1)
+		goto out;
+	nr_pages = min(len, nr_pages);
+	if (ti->type->direct_access) {
+		ret = ti->type->direct_access(ti, sector, kaddr, pfn,
+				nr_pages * PAGE_SIZE);
+		/*
+		 * FIXME: convert ti->type->direct_access to return
+		 * nr_pages directly.
+		 */
+		if (ret >= 0)
+			ret /= PAGE_SIZE;
+	}
+ out:
 	dm_put_live_table(md, srcu_idx);
-	return min(ret, size);
+
+	return ret;
+}
+
+static long dm_blk_direct_access(struct block_device *bdev, sector_t sector,
+		void **kaddr, pfn_t *pfn, long size)
+{
+	struct mapped_device *md = bdev->bd_disk->private_data;
+	struct dax_device *dax_dev = md->dax_dev;
+	long nr_pages = size / PAGE_SIZE;
+
+	nr_pages = dm_dax_direct_access(dax_dev, sector / PAGE_SECTORS,
+			nr_pages, kaddr, pfn);
+	return nr_pages < 0 ? nr_pages : nr_pages * PAGE_SIZE;
 }
 
 /*
@@ -1437,6 +1475,7 @@  static int next_free_minor(int *minor)
 }
 
 static const struct block_device_operations dm_blk_dops;
+static const struct dax_operations dm_dax_ops;
 
 static void dm_wq_work(struct work_struct *work);
 
@@ -1483,6 +1522,12 @@  static void cleanup_mapped_device(struct mapped_device *md)
 	if (md->bs)
 		bioset_free(md->bs);
 
+	if (md->dax_dev) {
+		kill_dax(md->dax_dev);
+		put_dax(md->dax_dev);
+		md->dax_dev = NULL;
+	}
+
 	if (md->disk) {
 		spin_lock(&_minor_lock);
 		md->disk->private_data = NULL;
@@ -1510,6 +1555,7 @@  static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
 	int r, numa_node_id = dm_get_numa_node();
+	struct dax_device *dax_dev;
 	struct mapped_device *md;
 	void *old_md;
 
@@ -1574,6 +1620,12 @@  static struct mapped_device *alloc_dev(int minor)
 	md->disk->queue = md->queue;
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
+
+	dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+	if (!dax_dev)
+		goto bad;
+	md->dax_dev = dax_dev;
+
 	add_disk(md->disk);
 	format_dev_t(md->name, MKDEV(_major, minor));
 
@@ -2781,6 +2833,10 @@  static const struct block_device_operations dm_blk_dops = {
 	.owner = THIS_MODULE
 };
 
+static const struct dax_operations dm_dax_ops = {
+	.direct_access = dm_dax_direct_access,
+};
+
 /*
  * module hooks
  */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index a7e6903866fd..bcba4d89089c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -130,6 +130,7 @@  typedef int (*dm_busy_fn) (struct dm_target *ti);
  */
 typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector,
 				     void **kaddr, pfn_t *pfn, long size);
+#define PAGE_SECTORS (PAGE_SIZE / 512)
 
 void dm_error(const char *message);