Message ID | 149245618859.10206.13182319600260215993.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Apr 17, 2017 at 12:09 PM, Dan Williams <dan.j.williams@intel.com> wrote: > Allocate a dax_device to represent the capacity of a device-mapper > instance. Provide a ->direct_access() method via the new dax_operations > indirection that mirrors the functionality of the current direct_access > support via block_device_operations. Once fs/dax.c has been converted > to use dax_operations the old dm_blk_direct_access() will be removed. > > A new helper dm_dax_get_live_target() is introduced to separate some of > the dm-specifics from the direct_access implementation. > > This enabling is only for the top-level dm representation to upper > layers. Converting target direct_access implementations is deferred to a > separate patch. > > Cc: Toshi Kani <toshi.kani@hpe.com> > Cc: Mike Snitzer <snitzer@redhat.com> Hi Mike, Any concerns with these dax_device and dax_operations changes to device-mapper for the upcoming merge window? > Signed-off-by: Dan Williams <dan.j.williams@intel.com> > --- > drivers/md/Kconfig | 1 > drivers/md/dm-core.h | 1 > drivers/md/dm.c | 84 ++++++++++++++++++++++++++++++++++------- > include/linux/device-mapper.h | 1 > 4 files changed, 73 insertions(+), 14 deletions(-) > > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig > index b7767da50c26..1de8372d9459 100644 > --- a/drivers/md/Kconfig > +++ b/drivers/md/Kconfig > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN > config BLK_DEV_DM > tristate "Device mapper support" > select BLK_DEV_DM_BUILTIN > + select DAX > ---help--- > Device-mapper is a low level volume manager. It works by allowing > people to specify mappings for ranges of logical sectors. Various > diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h > index 136fda3ff9e5..538630190f66 100644 > --- a/drivers/md/dm-core.h > +++ b/drivers/md/dm-core.h > @@ -58,6 +58,7 @@ struct mapped_device { > struct target_type *immutable_target_type; > > struct gendisk *disk; > + struct dax_device *dax_dev; > char name[16]; > > void *interface_ptr; > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index dfb75979e455..bd56dfe43a99 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -16,6 +16,7 @@ > #include <linux/blkpg.h> > #include <linux/bio.h> > #include <linux/mempool.h> > +#include <linux/dax.h> > #include <linux/slab.h> > #include <linux/idr.h> > #include <linux/hdreg.h> > @@ -908,31 +909,68 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) > } > EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); > > -static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, > - void **kaddr, pfn_t *pfn, long size) > +static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, > + sector_t sector, int *srcu_idx) > { > - struct mapped_device *md = bdev->bd_disk->private_data; > struct dm_table *map; > struct dm_target *ti; > - int srcu_idx; > - long len, ret = -EIO; > > - map = dm_get_live_table(md, &srcu_idx); > + map = dm_get_live_table(md, srcu_idx); > if (!map) > - goto out; > + return NULL; > > ti = dm_table_find_target(map, sector); > if (!dm_target_is_valid(ti)) > - goto out; > + return NULL; > > - len = max_io_len(sector, ti) << SECTOR_SHIFT; > - size = min(len, size); > + return ti; > +} > > - if (ti->type->direct_access) > - ret = ti->type->direct_access(ti, sector, kaddr, pfn, size); > -out: > +static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, > + long nr_pages, void **kaddr, pfn_t *pfn) > +{ > + struct mapped_device *md = dax_get_private(dax_dev); > + sector_t sector = pgoff * PAGE_SECTORS; > + struct dm_target *ti; > + long len, ret = -EIO; > + int srcu_idx; > + > + ti = dm_dax_get_live_target(md, sector, &srcu_idx); > + > + if (!ti) > + goto out; > + if (!ti->type->direct_access) > + goto out; > + len = max_io_len(sector, ti) / PAGE_SECTORS; > + if (len < 1) > + goto out; > + nr_pages = min(len, nr_pages); > + if (ti->type->direct_access) { > + ret = ti->type->direct_access(ti, sector, kaddr, pfn, > + nr_pages * PAGE_SIZE); > + /* > + * FIXME: convert ti->type->direct_access to return > + * nr_pages directly. > + */ > + if (ret >= 0) > + ret /= PAGE_SIZE; > + } > + out: > dm_put_live_table(md, srcu_idx); > - return min(ret, size); > + > + return ret; > +} > + > +static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, > + void **kaddr, pfn_t *pfn, long size) > +{ > + struct mapped_device *md = bdev->bd_disk->private_data; > + struct dax_device *dax_dev = md->dax_dev; > + long nr_pages = size / PAGE_SIZE; > + > + nr_pages = dm_dax_direct_access(dax_dev, sector / PAGE_SECTORS, > + nr_pages, kaddr, pfn); > + return nr_pages < 0 ? nr_pages : nr_pages * PAGE_SIZE; > } > > /* > @@ -1437,6 +1475,7 @@ static int next_free_minor(int *minor) > } > > static const struct block_device_operations dm_blk_dops; > +static const struct dax_operations dm_dax_ops; > > static void dm_wq_work(struct work_struct *work); > > @@ -1483,6 +1522,12 @@ static void cleanup_mapped_device(struct mapped_device *md) > if (md->bs) > bioset_free(md->bs); > > + if (md->dax_dev) { > + kill_dax(md->dax_dev); > + put_dax(md->dax_dev); > + md->dax_dev = NULL; > + } > + > if (md->disk) { > spin_lock(&_minor_lock); > md->disk->private_data = NULL; > @@ -1510,6 +1555,7 @@ static void cleanup_mapped_device(struct mapped_device *md) > static struct mapped_device *alloc_dev(int minor) > { > int r, numa_node_id = dm_get_numa_node(); > + struct dax_device *dax_dev; > struct mapped_device *md; > void *old_md; > > @@ -1574,6 +1620,12 @@ static struct mapped_device *alloc_dev(int minor) > md->disk->queue = md->queue; > md->disk->private_data = md; > sprintf(md->disk->disk_name, "dm-%d", minor); > + > + dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); > + if (!dax_dev) > + goto bad; > + md->dax_dev = dax_dev; > + > add_disk(md->disk); > format_dev_t(md->name, MKDEV(_major, minor)); > > @@ -2781,6 +2833,10 @@ static const struct block_device_operations dm_blk_dops = { > .owner = THIS_MODULE > }; > > +static const struct dax_operations dm_dax_ops = { > + .direct_access = dm_dax_direct_access, > +}; > + > /* > * module hooks > */ > diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h > index a7e6903866fd..bcba4d89089c 100644 > --- a/include/linux/device-mapper.h > +++ b/include/linux/device-mapper.h > @@ -130,6 +130,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); > */ > typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, > void **kaddr, pfn_t *pfn, long size); > +#define PAGE_SECTORS (PAGE_SIZE / 512) > > void dm_error(const char *message); > >
On Thu, Apr 20 2017 at 12:30pm -0400, Dan Williams <dan.j.williams@intel.com> wrote: > On Mon, Apr 17, 2017 at 12:09 PM, Dan Williams <dan.j.williams@intel.com> wrote: > > Allocate a dax_device to represent the capacity of a device-mapper > > instance. Provide a ->direct_access() method via the new dax_operations > > indirection that mirrors the functionality of the current direct_access > > support via block_device_operations. Once fs/dax.c has been converted > > to use dax_operations the old dm_blk_direct_access() will be removed. > > > > A new helper dm_dax_get_live_target() is introduced to separate some of > > the dm-specifics from the direct_access implementation. > > > > This enabling is only for the top-level dm representation to upper > > layers. Converting target direct_access implementations is deferred to a > > separate patch. > > > > Cc: Toshi Kani <toshi.kani@hpe.com> > > Cc: Mike Snitzer <snitzer@redhat.com> > > Hi Mike, > > Any concerns with these dax_device and dax_operations changes to > device-mapper for the upcoming merge window? Sorry for the delay. I just reviewed them, overall looks good. The enabling functions in the DAX code, that are mixed in with the DM changes, could maybe be factored out into separate earlier patches but I don't feel that strongly about that. Feel free to add this tag to the handful of relevant DM patches: Reviewed-by: Mike Snitzer <snitzer@redhat.com> I haven't done a merge with the linux-dm.git 'dm-4.12' branch but it'd be good to verify there aren't any merge conflicts. If there are then it'd be nice to know going in to the merge so that we can forecast as much to Linus. I really appreciate you doing this work! Thanks, Mike
On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote: > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig > index b7767da50c26..1de8372d9459 100644 > --- a/drivers/md/Kconfig > +++ b/drivers/md/Kconfig > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN > config BLK_DEV_DM > tristate "Device mapper support" > select BLK_DEV_DM_BUILTIN > + select DAX > ---help--- > Device-mapper is a low level volume manager. It works by allowing > people to specify mappings for ranges of logical sectors. Various (replying to an e-mail of three months ago) Hello Dan, While building a v4.12 kernel I noticed that enabling device mapper support now unconditionally enables DAX. I think there are plenty of systems that use dm but do not need DAX. Have you considered to rework this such that instead of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is enabled? Thanks, Bart.
On Fri, Jul 28 2017 at 12:17pm -0400, Bart Van Assche <Bart.VanAssche@wdc.com> wrote: > On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote: > > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig > > index b7767da50c26..1de8372d9459 100644 > > --- a/drivers/md/Kconfig > > +++ b/drivers/md/Kconfig > > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN > > config BLK_DEV_DM > > tristate "Device mapper support" > > select BLK_DEV_DM_BUILTIN > > + select DAX > > ---help--- > > Device-mapper is a low level volume manager. It works by allowing > > people to specify mappings for ranges of logical sectors. Various > > (replying to an e-mail of three months ago) > > Hello Dan, > > While building a v4.12 kernel I noticed that enabling device mapper support > now unconditionally enables DAX. I think there are plenty of systems that use > dm but do not need DAX. Have you considered to rework this such that instead > of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is > enabled? I haven't but patches to do so would be welcomed. Mike
On Fri, Jul 28, 2017 at 9:17 AM, Bart Van Assche <Bart.VanAssche@wdc.com> wrote: > On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote: >> diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig >> index b7767da50c26..1de8372d9459 100644 >> --- a/drivers/md/Kconfig >> +++ b/drivers/md/Kconfig >> @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN >> config BLK_DEV_DM >> tristate "Device mapper support" >> select BLK_DEV_DM_BUILTIN >> + select DAX >> ---help--- >> Device-mapper is a low level volume manager. It works by allowing >> people to specify mappings for ranges of logical sectors. Various > > (replying to an e-mail of three months ago) > > Hello Dan, > > While building a v4.12 kernel I noticed that enabling device mapper support > now unconditionally enables DAX. I think there are plenty of systems that use > dm but do not need DAX. Have you considered to rework this such that instead > of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is > enabled? > I'd rather flip this around and add a CONFIG_DM_DAX that gates whether DM enables / links to the DAX core. I'll take a look at a patch.
On Sat, 2017-07-29 at 12:57 -0700, Dan Williams wrote: > On Fri, Jul 28, 2017 at 9:17 AM, Bart Van Assche <Bart.VanAssche@wdc.com> wrote: > > On Mon, 2017-04-17 at 12:09 -0700, Dan Williams wrote: > > > diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig > > > index b7767da50c26..1de8372d9459 100644 > > > --- a/drivers/md/Kconfig > > > +++ b/drivers/md/Kconfig > > > @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN > > > config BLK_DEV_DM > > > tristate "Device mapper support" > > > select BLK_DEV_DM_BUILTIN > > > + select DAX > > > ---help--- > > > Device-mapper is a low level volume manager. It works by allowing > > > people to specify mappings for ranges of logical sectors. Various > > > > (replying to an e-mail of three months ago) > > > > Hello Dan, > > > > While building a v4.12 kernel I noticed that enabling device mapper support > > now unconditionally enables DAX. I think there are plenty of systems that use > > dm but do not need DAX. Have you considered to rework this such that instead > > of dm selecting DAX that DAX support is only enabled in dm if CONFIG_DAX is > > enabled? > > I'd rather flip this around and add a CONFIG_DM_DAX that gates whether > DM enables / links to the DAX core. I'll take a look at a patch. Thanks! Please also consider to move all DAX-related dm code into a separate source file such that the number of #ifdef CONFIG_DM_DAX statements can be kept to an absolute minimum. Bart.
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index b7767da50c26..1de8372d9459 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -200,6 +200,7 @@ config BLK_DEV_DM_BUILTIN config BLK_DEV_DM tristate "Device mapper support" select BLK_DEV_DM_BUILTIN + select DAX ---help--- Device-mapper is a low level volume manager. It works by allowing people to specify mappings for ranges of logical sectors. Various diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 136fda3ff9e5..538630190f66 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -58,6 +58,7 @@ struct mapped_device { struct target_type *immutable_target_type; struct gendisk *disk; + struct dax_device *dax_dev; char name[16]; void *interface_ptr; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dfb75979e455..bd56dfe43a99 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -16,6 +16,7 @@ #include <linux/blkpg.h> #include <linux/bio.h> #include <linux/mempool.h> +#include <linux/dax.h> #include <linux/slab.h> #include <linux/idr.h> #include <linux/hdreg.h> @@ -908,31 +909,68 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); -static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, pfn_t *pfn, long size) +static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, + sector_t sector, int *srcu_idx) { - struct mapped_device *md = bdev->bd_disk->private_data; struct dm_table *map; struct dm_target *ti; - int srcu_idx; - long len, ret = -EIO; - map = dm_get_live_table(md, &srcu_idx); + map = dm_get_live_table(md, srcu_idx); if (!map) - goto out; + return NULL; ti = dm_table_find_target(map, sector); if (!dm_target_is_valid(ti)) - goto out; + return NULL; - len = max_io_len(sector, ti) << SECTOR_SHIFT; - size = min(len, size); + return ti; +} - if (ti->type->direct_access) - ret = ti->type->direct_access(ti, sector, kaddr, pfn, size); -out: +static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long len, ret = -EIO; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (!ti->type->direct_access) + goto out; + len = max_io_len(sector, ti) / PAGE_SECTORS; + if (len < 1) + goto out; + nr_pages = min(len, nr_pages); + if (ti->type->direct_access) { + ret = ti->type->direct_access(ti, sector, kaddr, pfn, + nr_pages * PAGE_SIZE); + /* + * FIXME: convert ti->type->direct_access to return + * nr_pages directly. + */ + if (ret >= 0) + ret /= PAGE_SIZE; + } + out: dm_put_live_table(md, srcu_idx); - return min(ret, size); + + return ret; +} + +static long dm_blk_direct_access(struct block_device *bdev, sector_t sector, + void **kaddr, pfn_t *pfn, long size) +{ + struct mapped_device *md = bdev->bd_disk->private_data; + struct dax_device *dax_dev = md->dax_dev; + long nr_pages = size / PAGE_SIZE; + + nr_pages = dm_dax_direct_access(dax_dev, sector / PAGE_SECTORS, + nr_pages, kaddr, pfn); + return nr_pages < 0 ? nr_pages : nr_pages * PAGE_SIZE; } /* @@ -1437,6 +1475,7 @@ static int next_free_minor(int *minor) } static const struct block_device_operations dm_blk_dops; +static const struct dax_operations dm_dax_ops; static void dm_wq_work(struct work_struct *work); @@ -1483,6 +1522,12 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->bs) bioset_free(md->bs); + if (md->dax_dev) { + kill_dax(md->dax_dev); + put_dax(md->dax_dev); + md->dax_dev = NULL; + } + if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1510,6 +1555,7 @@ static void cleanup_mapped_device(struct mapped_device *md) static struct mapped_device *alloc_dev(int minor) { int r, numa_node_id = dm_get_numa_node(); + struct dax_device *dax_dev; struct mapped_device *md; void *old_md; @@ -1574,6 +1620,12 @@ static struct mapped_device *alloc_dev(int minor) md->disk->queue = md->queue; md->disk->private_data = md; sprintf(md->disk->disk_name, "dm-%d", minor); + + dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops); + if (!dax_dev) + goto bad; + md->dax_dev = dax_dev; + add_disk(md->disk); format_dev_t(md->name, MKDEV(_major, minor)); @@ -2781,6 +2833,10 @@ static const struct block_device_operations dm_blk_dops = { .owner = THIS_MODULE }; +static const struct dax_operations dm_dax_ops = { + .direct_access = dm_dax_direct_access, +}; + /* * module hooks */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7e6903866fd..bcba4d89089c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -130,6 +130,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, void **kaddr, pfn_t *pfn, long size); +#define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message);
Allocate a dax_device to represent the capacity of a device-mapper instance. Provide a ->direct_access() method via the new dax_operations indirection that mirrors the functionality of the current direct_access support via block_device_operations. Once fs/dax.c has been converted to use dax_operations the old dm_blk_direct_access() will be removed. A new helper dm_dax_get_live_target() is introduced to separate some of the dm-specifics from the direct_access implementation. This enabling is only for the top-level dm representation to upper layers. Converting target direct_access implementations is deferred to a separate patch. Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Mike Snitzer <snitzer@redhat.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- drivers/md/Kconfig | 1 drivers/md/dm-core.h | 1 drivers/md/dm.c | 84 ++++++++++++++++++++++++++++++++++------- include/linux/device-mapper.h | 1 4 files changed, 73 insertions(+), 14 deletions(-)