diff mbox

[v4,6/6] block: Update blkdev_dax_capable() for consistency

Message ID 1462897437-16626-7-git-send-email-toshi.kani@hpe.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kani, Toshi May 10, 2016, 4:23 p.m. UTC
blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
to remain as a separate interface for checking dax capability of
a raw block device.

Rename and relocate blkdev_dax_capable() to keep them maintained
consistently, and call bdev_direct_access() for the dax capability
check.

There is no change in the behavior.

Link: https://lkml.org/lkml/2016/5/9/950
Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Jens Axboe <axboe@fb.com>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Boaz Harrosh <boaz@plexistor.com>
---
 block/ioctl.c          |   30 ------------------------------
 fs/block_dev.c         |   39 +++++++++++++++++++++++++++++++++++++--
 include/linux/blkdev.h |    1 +
 include/linux/fs.h     |    8 --------
 4 files changed, 38 insertions(+), 40 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Dan Williams May 10, 2016, 7:49 p.m. UTC | #1
On Tue, May 10, 2016 at 9:23 AM, Toshi Kani <toshi.kani@hpe.com> wrote:
> blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> to remain as a separate interface for checking dax capability of
> a raw block device.
>
> Rename and relocate blkdev_dax_capable() to keep them maintained
> consistently, and call bdev_direct_access() for the dax capability
> check.
>
> There is no change in the behavior.
>
> Link: https://lkml.org/lkml/2016/5/9/950
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Jens Axboe <axboe@fb.com>
> Cc: Andreas Dilger <adilger.kernel@dilger.ca>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Dave Chinner <david@fromorbit.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: Boaz Harrosh <boaz@plexistor.com>
> ---
>  block/ioctl.c          |   30 ------------------------------
>  fs/block_dev.c         |   39 +++++++++++++++++++++++++++++++++++++--
>  include/linux/blkdev.h |    1 +
>  include/linux/fs.h     |    8 --------
>  4 files changed, 38 insertions(+), 40 deletions(-)
>
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4ff1f92..7eeda07 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -4,7 +4,6 @@
>  #include <linux/gfp.h>
>  #include <linux/blkpg.h>
>  #include <linux/hdreg.h>
> -#include <linux/badblocks.h>
>  #include <linux/backing-dev.h>
>  #include <linux/fs.h>
>  #include <linux/blktrace_api.h>
> @@ -407,35 +406,6 @@ static inline int is_unrecognized_ioctl(int ret)
>                 ret == -ENOIOCTLCMD;
>  }
>
> -#ifdef CONFIG_FS_DAX
> -bool blkdev_dax_capable(struct block_device *bdev)
> -{
> -       struct gendisk *disk = bdev->bd_disk;
> -
> -       if (!disk->fops->direct_access)
> -               return false;
> -
> -       /*
> -        * If the partition is not aligned on a page boundary, we can't
> -        * do dax I/O to it.
> -        */
> -       if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
> -                       || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
> -               return false;
> -
> -       /*
> -        * If the device has known bad blocks, force all I/O through the
> -        * driver / page cache.
> -        *
> -        * TODO: support finer grained dax error handling
> -        */
> -       if (disk->bb && disk->bb->count)
> -               return false;
> -
> -       return true;
> -}
> -#endif

This will collide with my pending change to revert raw block device
dax support, and also with Vishal's DAX error handling changes.  For
coordination purposes I'm thining this should all go on top of the
branch that Vishal is putting together with the dax zeroing changes
from Jan and Christoph as well.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kani, Toshi May 10, 2016, 9:36 p.m. UTC | #2
On Tue, 2016-05-10 at 12:49 -0700, Dan Williams wrote:
> On Tue, May 10, 2016 at 9:23 AM, Toshi Kani <toshi.kani@hpe.com> wrote:
> > 
> > blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> > to remain as a separate interface for checking dax capability of
> > a raw block device.
> > 
> > Rename and relocate blkdev_dax_capable() to keep them maintained
> > consistently, and call bdev_direct_access() for the dax capability
> > check.
> > 
> > There is no change in the behavior.
 :
> > diff --git a/block/ioctl.c b/block/ioctl.c
> > index 4ff1f92..7eeda07 100644
> > --- a/block/ioctl.c
> > +++ b/block/ioctl.c
> > @@ -4,7 +4,6 @@
> >  #include <linux/gfp.h>
> >  #include <linux/blkpg.h>
> >  #include <linux/hdreg.h>
> > -#include <linux/badblocks.h>
> >  #include <linux/backing-dev.h>
> >  #include <linux/fs.h>
> >  #include <linux/blktrace_api.h>
> > @@ -407,35 +406,6 @@ static inline int is_unrecognized_ioctl(int ret)
> >                 ret == -ENOIOCTLCMD;
> >  }
> > 
> > -#ifdef CONFIG_FS_DAX
> > -bool blkdev_dax_capable(struct block_device *bdev)
> > -{
> > -       struct gendisk *disk = bdev->bd_disk;
> > -
> > -       if (!disk->fops->direct_access)
> > -               return false;
> > -
> > -       /*
> > -        * If the partition is not aligned on a page boundary, we can't
> > -        * do dax I/O to it.
> > -        */
> > -       if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
> > -                       || (bdev->bd_part->nr_sects % (PAGE_SIZE /
> > 512)))
> > -               return false;
> > -
> > -       /*
> > -        * If the device has known bad blocks, force all I/O through
> > the
> > -        * driver / page cache.
> > -        *
> > -        * TODO: support finer grained dax error handling
> > -        */
> > -       if (disk->bb && disk->bb->count)
> > -               return false;
> > -
> > -       return true;
> > -}
> > -#endif
>
> This will collide with my pending change to revert raw block device
> dax support, and also with Vishal's DAX error handling changes.  For
> coordination purposes I'm thining this should all go on top of the
> branch that Vishal is putting together with the dax zeroing changes
> from Jan and Christoph as well.

This patch does not depend on the rest of the series, so it can be handled
separately.  There is a minor conflict -- bdev_dax_capable() is put under
bdev_dax_supported() in the same file.  This should be easy to resolve, but
let me know if you need me to merge it up.

Thanks,
-Toshi
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara May 11, 2016, 8:05 a.m. UTC | #3
On Tue 10-05-16 10:23:57, Toshi Kani wrote:
> blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> to remain as a separate interface for checking dax capability of
> a raw block device.
> 
> Rename and relocate blkdev_dax_capable() to keep them maintained
> consistently, and call bdev_direct_access() for the dax capability
> check.
...
> +bool bdev_dax_capable(struct block_device *bdev)
> +{
> +	struct gendisk *disk = bdev->bd_disk;
> +	struct blk_dax_ctl dax = {
> +		.size = PAGE_SIZE,
> +	};
> +
> +	if (!IS_ENABLED(CONFIG_FS_DAX))
> +		return false;

Frankly, I prefer the #ifdef CONFIG_FS_DAX and just compile the code out
when DAX is not enabled (like it was with blkdev_dax_capable()). That way we
don't grow the kernel for people who don't care about DAX.

								Honza
Kani, Toshi May 11, 2016, 2:25 p.m. UTC | #4
On Wed, 2016-05-11 at 10:05 +0200, Jan Kara wrote:
> On Tue 10-05-16 10:23:57, Toshi Kani wrote:
> > 
> > blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> > to remain as a separate interface for checking dax capability of
> > a raw block device.
> > 
> > Rename and relocate blkdev_dax_capable() to keep them maintained
> > consistently, and call bdev_direct_access() for the dax capability
> > check.
> ...
> > 
> > +bool bdev_dax_capable(struct block_device *bdev)
> > +{
> > +	struct gendisk *disk = bdev->bd_disk;
> > +	struct blk_dax_ctl dax = {
> > +		.size = PAGE_SIZE,
> > +	};
> > +
> > +	if (!IS_ENABLED(CONFIG_FS_DAX))
> > +		return false;
>
> Frankly, I prefer the #ifdef CONFIG_FS_DAX and just compile the code out
> when DAX is not enabled (like it was with blkdev_dax_capable()). That way
> we don't grow the kernel for people who don't care about DAX.

When CONFIG_FS_DAX is not set, the rest of the code is optimized out.  So,
I think the code size is the same.

(gdb) disas bdev_dax_capable
Dump of assembler code for function bdev_dax_capable:
   0xffffffff81260d20 <+0>:     callq  0xffffffff81813c30 <__fentry__>
   0xffffffff81260d25 <+5>:     push   %rbp
   0xffffffff81260d26 <+6>:     xor    %eax,%eax
   0xffffffff81260d28 <+8>:     mov    %rsp,%rbp
   0xffffffff81260d2b <+11>:    pop    %rbp
   0xffffffff81260d2c <+12>:    retq   
End of assembler dump.

Thanks,
-Toshi
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara May 11, 2016, 3:26 p.m. UTC | #5
On Wed 11-05-16 08:25:10, Toshi Kani wrote:
> On Wed, 2016-05-11 at 10:05 +0200, Jan Kara wrote:
> > On Tue 10-05-16 10:23:57, Toshi Kani wrote:
> > > 
> > > blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> > > to remain as a separate interface for checking dax capability of
> > > a raw block device.
> > > 
> > > Rename and relocate blkdev_dax_capable() to keep them maintained
> > > consistently, and call bdev_direct_access() for the dax capability
> > > check.
> > ...
> > > 
> > > +bool bdev_dax_capable(struct block_device *bdev)
> > > +{
> > > +	struct gendisk *disk = bdev->bd_disk;
> > > +	struct blk_dax_ctl dax = {
> > > +		.size = PAGE_SIZE,
> > > +	};
> > > +
> > > +	if (!IS_ENABLED(CONFIG_FS_DAX))
> > > +		return false;
> >
> > Frankly, I prefer the #ifdef CONFIG_FS_DAX and just compile the code out
> > when DAX is not enabled (like it was with blkdev_dax_capable()). That way
> > we don't grow the kernel for people who don't care about DAX.
> 
> When CONFIG_FS_DAX is not set, the rest of the code is optimized out.  So,
> I think the code size is the same.
> 
> (gdb) disas bdev_dax_capable
> Dump of assembler code for function bdev_dax_capable:
>    0xffffffff81260d20 <+0>:     callq  0xffffffff81813c30 <__fentry__>
>    0xffffffff81260d25 <+5>:     push   %rbp
>    0xffffffff81260d26 <+6>:     xor    %eax,%eax
>    0xffffffff81260d28 <+8>:     mov    %rsp,%rbp
>    0xffffffff81260d2b <+11>:    pop    %rbp
>    0xffffffff81260d2c <+12>:    retq   
> End of assembler dump.

Ah, good. So feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza
Dan Williams May 17, 2016, 10:07 p.m. UTC | #6
On Tue, May 10, 2016 at 9:23 AM, Toshi Kani <toshi.kani@hpe.com> wrote:
> blkdev_dax_capable() is similar to bdev_dax_supported(), but needs
> to remain as a separate interface for checking dax capability of
> a raw block device.
>
> Rename and relocate blkdev_dax_capable() to keep them maintained
> consistently, and call bdev_direct_access() for the dax capability
> check.
>
> There is no change in the behavior.
>
> Link: https://lkml.org/lkml/2016/5/9/950
> Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
> Cc: Alexander Viro <viro@zeniv.linux.org.uk>
> Cc: Jens Axboe <axboe@fb.com>
> Cc: Andreas Dilger <adilger.kernel@dilger.ca>
> Cc: Jan Kara <jack@suse.cz>
> Cc: Dave Chinner <david@fromorbit.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
> Cc: Christoph Hellwig <hch@infradead.org>
> Cc: Boaz Harrosh <boaz@plexistor.com>
> ---
>  block/ioctl.c          |   30 ------------------------------
>  fs/block_dev.c         |   39 +++++++++++++++++++++++++++++++++++++--
>  include/linux/blkdev.h |    1 +
>  include/linux/fs.h     |    8 --------
>  4 files changed, 38 insertions(+), 40 deletions(-)
>
> diff --git a/block/ioctl.c b/block/ioctl.c
> index 4ff1f92..7eeda07 100644
> --- a/block/ioctl.c
> +++ b/block/ioctl.c
> @@ -4,7 +4,6 @@
>  #include <linux/gfp.h>
>  #include <linux/blkpg.h>
>  #include <linux/hdreg.h>
> -#include <linux/badblocks.h>
>  #include <linux/backing-dev.h>
>  #include <linux/fs.h>
>  #include <linux/blktrace_api.h>
> @@ -407,35 +406,6 @@ static inline int is_unrecognized_ioctl(int ret)
>                 ret == -ENOIOCTLCMD;
>  }
>
> -#ifdef CONFIG_FS_DAX
> -bool blkdev_dax_capable(struct block_device *bdev)
> -{
> -       struct gendisk *disk = bdev->bd_disk;
> -
> -       if (!disk->fops->direct_access)
> -               return false;
> -
> -       /*
> -        * If the partition is not aligned on a page boundary, we can't
> -        * do dax I/O to it.
> -        */
> -       if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
> -                       || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
> -               return false;
> -
> -       /*
> -        * If the device has known bad blocks, force all I/O through the
> -        * driver / page cache.
> -        *
> -        * TODO: support finer grained dax error handling
> -        */
> -       if (disk->bb && disk->bb->count)
> -               return false;
> -
> -       return true;
> -}
> -#endif
> -
>  static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
>                 unsigned cmd, unsigned long arg)
>  {
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index db55638..5d79093 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -29,6 +29,7 @@
>  #include <linux/log2.h>
>  #include <linux/cleancache.h>
>  #include <linux/dax.h>
> +#include <linux/badblocks.h>
>  #include <asm/uaccess.h>
>  #include "internal.h"
>
> @@ -554,6 +555,40 @@ int bdev_dax_supported(struct super_block *sb, int blocksize)
>  }
>  EXPORT_SYMBOL_GPL(bdev_dax_supported);
>
> +/**
> + * bdev_dax_capable() - Return if the raw device is capable for dax
> + * @bdev: The device for raw block device access
> + */
> +bool bdev_dax_capable(struct block_device *bdev)
> +{
> +       struct gendisk *disk = bdev->bd_disk;
> +       struct blk_dax_ctl dax = {
> +               .size = PAGE_SIZE,
> +       };
> +
> +       if (!IS_ENABLED(CONFIG_FS_DAX))
> +               return false;
> +
> +       dax.sector = 0;
> +       if (bdev_direct_access(bdev, &dax) < 0)
> +               return false;
> +
> +       dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512);
> +       if (bdev_direct_access(bdev, &dax) < 0)
> +               return false;

So I just noticed that this new implementation of bdev_dax_capable()
prevents us from enabling DAX in the presence of errors, which was the
goal of Vishal's pending patches for 4.7.

I like the goal of centralizing checks, but this collides the base DAX
capability with the ability to perform an actual accesses at a given
address.

All the immediate solutions that come to mind right now are pretty
ugly, like an ignore errors flag...

Hmm, what about explicitly returning -EBADBLOCK and updating size to
return the offset of the error?  That might be useful information to
have in general...
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/block/ioctl.c b/block/ioctl.c
index 4ff1f92..7eeda07 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -4,7 +4,6 @@ 
 #include <linux/gfp.h>
 #include <linux/blkpg.h>
 #include <linux/hdreg.h>
-#include <linux/badblocks.h>
 #include <linux/backing-dev.h>
 #include <linux/fs.h>
 #include <linux/blktrace_api.h>
@@ -407,35 +406,6 @@  static inline int is_unrecognized_ioctl(int ret)
 		ret == -ENOIOCTLCMD;
 }
 
-#ifdef CONFIG_FS_DAX
-bool blkdev_dax_capable(struct block_device *bdev)
-{
-	struct gendisk *disk = bdev->bd_disk;
-
-	if (!disk->fops->direct_access)
-		return false;
-
-	/*
-	 * If the partition is not aligned on a page boundary, we can't
-	 * do dax I/O to it.
-	 */
-	if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512))
-			|| (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
-		return false;
-
-	/*
-	 * If the device has known bad blocks, force all I/O through the
-	 * driver / page cache.
-	 *
-	 * TODO: support finer grained dax error handling
-	 */
-	if (disk->bb && disk->bb->count)
-		return false;
-
-	return true;
-}
-#endif
-
 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode,
 		unsigned cmd, unsigned long arg)
 {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index db55638..5d79093 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -29,6 +29,7 @@ 
 #include <linux/log2.h>
 #include <linux/cleancache.h>
 #include <linux/dax.h>
+#include <linux/badblocks.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
@@ -554,6 +555,40 @@  int bdev_dax_supported(struct super_block *sb, int blocksize)
 }
 EXPORT_SYMBOL_GPL(bdev_dax_supported);
 
+/**
+ * bdev_dax_capable() - Return if the raw device is capable for dax
+ * @bdev: The device for raw block device access
+ */
+bool bdev_dax_capable(struct block_device *bdev)
+{
+	struct gendisk *disk = bdev->bd_disk;
+	struct blk_dax_ctl dax = {
+		.size = PAGE_SIZE,
+	};
+
+	if (!IS_ENABLED(CONFIG_FS_DAX))
+		return false;
+
+	dax.sector = 0;
+	if (bdev_direct_access(bdev, &dax) < 0)
+		return false;
+
+	dax.sector = bdev->bd_part->nr_sects - (PAGE_SIZE / 512);
+	if (bdev_direct_access(bdev, &dax) < 0)
+		return false;
+
+	/*
+	 * If the device has known bad blocks, force all I/O through the
+	 * driver / page cache.
+	 *
+	 * TODO: support finer grained dax error handling
+	 */
+	if (disk->bb && disk->bb->count)
+		return false;
+
+	return true;
+}
+
 /*
  * pseudo-fs
  */
@@ -1295,7 +1330,7 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 
 			if (!ret) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
-				if (!blkdev_dax_capable(bdev))
+				if (!bdev_dax_capable(bdev))
 					bdev->bd_inode->i_flags &= ~S_DAX;
 			}
 
@@ -1332,7 +1367,7 @@  static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				goto out_clear;
 			}
 			bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
-			if (!blkdev_dax_capable(bdev))
+			if (!bdev_dax_capable(bdev))
 				bdev->bd_inode->i_flags &= ~S_DAX;
 		}
 	} else {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 71231a5..27cbefe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1689,6 +1689,7 @@  extern int bdev_write_page(struct block_device *, sector_t, struct page *,
 						struct writeback_control *);
 extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *);
 extern int bdev_dax_supported(struct super_block *, int);
+extern bool bdev_dax_capable(struct block_device *);
 #else /* CONFIG_BLOCK */
 
 struct block_device;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70e61b5..8363a10 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2320,14 +2320,6 @@  extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
-#ifdef CONFIG_FS_DAX
-extern bool blkdev_dax_capable(struct block_device *bdev);
-#else
-static inline bool blkdev_dax_capable(struct block_device *bdev)
-{
-	return false;
-}
-#endif
 
 extern struct super_block *blockdev_superblock;