diff mbox series

[5/6] block: use iomap for writes to block devices

Message ID 20230801172201.1923299-6-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/6] fs: remove emergency_thaw_bdev | expand

Commit Message

Christoph Hellwig Aug. 1, 2023, 5:22 p.m. UTC
Use iomap in buffer_head compat mode to write to block devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 block/Kconfig |  1 +
 block/fops.c  | 31 +++++++++++++++++++++++++++++--
 2 files changed, 30 insertions(+), 2 deletions(-)

Comments

Christian Brauner Aug. 2, 2023, 7:27 a.m. UTC | #1
On Tue, Aug 01, 2023 at 07:22:00PM +0200, Christoph Hellwig wrote:
> Use iomap in buffer_head compat mode to write to block devices.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> ---

Reviewed-by: Christian Brauner <brauner@kernel.org>
Johannes Thumshirn Aug. 2, 2023, 11:50 a.m. UTC | #2
Looks good,
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Xu Yang April 26, 2024, 10:37 a.m. UTC | #3
Hi Christoph,

On Tue, Aug 01, 2023 at 07:22:00PM +0200, Christoph Hellwig wrote:
> Use iomap in buffer_head compat mode to write to block devices.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
> Reviewed-by: Hannes Reinecke <hare@suse.de>
> ---
>  block/Kconfig |  1 +
>  block/fops.c  | 31 +++++++++++++++++++++++++++++--
>  2 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/block/Kconfig b/block/Kconfig
> index 86122e459fe046..1a13ef0b1ca10c 100644
> --- a/block/Kconfig
> +++ b/block/Kconfig
> @@ -5,6 +5,7 @@
>  menuconfig BLOCK
>         bool "Enable the block layer" if EXPERT
>         default y
> +       select FS_IOMAP
>         select SBITMAP
>         help
>  	 Provide block layer support for the kernel.
> diff --git a/block/fops.c b/block/fops.c
> index f0b822c28ddfe2..063ece37d44e44 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -15,6 +15,7 @@
>  #include <linux/falloc.h>
>  #include <linux/suspend.h>
>  #include <linux/fs.h>
> +#include <linux/iomap.h>
>  #include <linux/module.h>
>  #include "blk.h"
>  
> @@ -386,6 +387,27 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>  	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
>  }
>  
> +static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> +		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> +{
> +	struct block_device *bdev = I_BDEV(inode);
> +	loff_t isize = i_size_read(inode);
> +
> +	iomap->bdev = bdev;
> +	iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
> +	if (iomap->offset >= isize)
> +		return -EIO;
> +	iomap->type = IOMAP_MAPPED;
> +	iomap->addr = iomap->offset;
> +	iomap->length = isize - iomap->offset;
> +	iomap->flags |= IOMAP_F_BUFFER_HEAD;
> +	return 0;
> +}
> +
> +static const struct iomap_ops blkdev_iomap_ops = {
> +	.iomap_begin		= blkdev_iomap_begin,
> +};
> +
>  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
>  {
>  	return block_write_full_page(page, blkdev_get_block, wbc);
> @@ -556,6 +578,11 @@ blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
>  	return written;
>  }
>  
> +static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
> +{
> +	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
> +}
> +
>  /*
>   * Write data to the block device.  Only intended for the block device itself
>   * and the raw driver which basically is a fake block device.
> @@ -605,9 +632,9 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  		ret = blkdev_direct_write(iocb, from);
>  		if (ret >= 0 && iov_iter_count(from))
>  			ret = direct_write_fallback(iocb, from, ret,
> -					generic_perform_write(iocb, from));
> +					blkdev_buffered_write(iocb, from));
>  	} else {
> -		ret = generic_perform_write(iocb, from);
> +		ret = blkdev_buffered_write(iocb, from);
>  	}
>  
>  	if (ret > 0)

I'm testing SSD block device write performance recently. I found the write
speed descrased greatly on my board (330MB/s -> 130MB/s). Then I spent some
time to find cause, finally find that it's caused by this patch and if I
revert this patch, write speed can recover to 330MB/s.

I'm using below command to test write performance:
dd if=/dev/zero of=/dev/sda bs=4M count=1024

And I also do more tests to get more findings. In short, I found write
speed changes with the "bs=" parameter.

I totally write 4GB data to sda for each test, the results as below:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (334 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (278 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (204 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (170 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (150 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (139 MB/s)

When this patch reverted, I got below results:

 - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (339 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (330 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (332 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (333 MB/s)
 - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (333 MB/s)

I just want to know if this results is expected when uses iomap, or it's
a real issue?

Many thanks in advance!

Best Regards,
Xu Yang

> -- 
> 2.39.2
>
Xu Yang May 8, 2024, 1:45 a.m. UTC | #4
On Fri, Apr 26, 2024 at 06:37:27PM +0800, Xu Yang wrote:
> Hi Christoph,
> 
> On Tue, Aug 01, 2023 at 07:22:00PM +0200, Christoph Hellwig wrote:
> > Use iomap in buffer_head compat mode to write to block devices.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
> > Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
> > Reviewed-by: Hannes Reinecke <hare@suse.de>
> > ---
> >  block/Kconfig |  1 +
> >  block/fops.c  | 31 +++++++++++++++++++++++++++++--
> >  2 files changed, 30 insertions(+), 2 deletions(-)
> > 
> > diff --git a/block/Kconfig b/block/Kconfig
> > index 86122e459fe046..1a13ef0b1ca10c 100644
> > --- a/block/Kconfig
> > +++ b/block/Kconfig
> > @@ -5,6 +5,7 @@
> >  menuconfig BLOCK
> >         bool "Enable the block layer" if EXPERT
> >         default y
> > +       select FS_IOMAP
> >         select SBITMAP
> >         help
> >  	 Provide block layer support for the kernel.
> > diff --git a/block/fops.c b/block/fops.c
> > index f0b822c28ddfe2..063ece37d44e44 100644
> > --- a/block/fops.c
> > +++ b/block/fops.c
> > @@ -15,6 +15,7 @@
> >  #include <linux/falloc.h>
> >  #include <linux/suspend.h>
> >  #include <linux/fs.h>
> > +#include <linux/iomap.h>
> >  #include <linux/module.h>
> >  #include "blk.h"
> >  
> > @@ -386,6 +387,27 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
> >  	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
> >  }
> >  
> > +static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> > +		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
> > +{
> > +	struct block_device *bdev = I_BDEV(inode);
> > +	loff_t isize = i_size_read(inode);
> > +
> > +	iomap->bdev = bdev;
> > +	iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
> > +	if (iomap->offset >= isize)
> > +		return -EIO;
> > +	iomap->type = IOMAP_MAPPED;
> > +	iomap->addr = iomap->offset;
> > +	iomap->length = isize - iomap->offset;
> > +	iomap->flags |= IOMAP_F_BUFFER_HEAD;
> > +	return 0;
> > +}
> > +
> > +static const struct iomap_ops blkdev_iomap_ops = {
> > +	.iomap_begin		= blkdev_iomap_begin,
> > +};
> > +
> >  static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
> >  {
> >  	return block_write_full_page(page, blkdev_get_block, wbc);
> > @@ -556,6 +578,11 @@ blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
> >  	return written;
> >  }
> >  
> > +static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
> > +{
> > +	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
> > +}
> > +
> >  /*
> >   * Write data to the block device.  Only intended for the block device itself
> >   * and the raw driver which basically is a fake block device.
> > @@ -605,9 +632,9 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
> >  		ret = blkdev_direct_write(iocb, from);
> >  		if (ret >= 0 && iov_iter_count(from))
> >  			ret = direct_write_fallback(iocb, from, ret,
> > -					generic_perform_write(iocb, from));
> > +					blkdev_buffered_write(iocb, from));
> >  	} else {
> > -		ret = generic_perform_write(iocb, from);
> > +		ret = blkdev_buffered_write(iocb, from);
> >  	}
> >  
> >  	if (ret > 0)
> 
> I'm testing SSD block device write performance recently. I found the write
> speed descrased greatly on my board (330MB/s -> 130MB/s). Then I spent some
> time to find cause, finally find that it's caused by this patch and if I
> revert this patch, write speed can recover to 330MB/s.
> 
> I'm using below command to test write performance:
> dd if=/dev/zero of=/dev/sda bs=4M count=1024
> 
> And I also do more tests to get more findings. In short, I found write
> speed changes with the "bs=" parameter.
> 
> I totally write 4GB data to sda for each test, the results as below:
> 
>  - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (334 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (278 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (204 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (170 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (150 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (139 MB/s)
> 
> When this patch reverted, I got below results:
> 
>  - dd if=/dev/zero of=/dev/sda bs=400K  count=10485  (339 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=800K  count=5242   (330 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=1600K count=2621   (332 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=2200K count=1906   (333 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=3000K count=1398   (333 MB/s)
>  - dd if=/dev/zero of=/dev/sda bs=4500K count=932    (333 MB/s)
> 
> I just want to know if this results is expected when uses iomap, or it's
> a real issue?
> 
> Many thanks in advance!

A gentle ping.

> 
> Best Regards,
> Xu Yang
> 
> > -- 
> > 2.39.2
> >
diff mbox series

Patch

diff --git a/block/Kconfig b/block/Kconfig
index 86122e459fe046..1a13ef0b1ca10c 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,6 +5,7 @@ 
 menuconfig BLOCK
        bool "Enable the block layer" if EXPERT
        default y
+       select FS_IOMAP
        select SBITMAP
        help
 	 Provide block layer support for the kernel.
diff --git a/block/fops.c b/block/fops.c
index f0b822c28ddfe2..063ece37d44e44 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -15,6 +15,7 @@ 
 #include <linux/falloc.h>
 #include <linux/suspend.h>
 #include <linux/fs.h>
+#include <linux/iomap.h>
 #include <linux/module.h>
 #include "blk.h"
 
@@ -386,6 +387,27 @@  static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
 }
 
+static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
+		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
+{
+	struct block_device *bdev = I_BDEV(inode);
+	loff_t isize = i_size_read(inode);
+
+	iomap->bdev = bdev;
+	iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
+	if (iomap->offset >= isize)
+		return -EIO;
+	iomap->type = IOMAP_MAPPED;
+	iomap->addr = iomap->offset;
+	iomap->length = isize - iomap->offset;
+	iomap->flags |= IOMAP_F_BUFFER_HEAD;
+	return 0;
+}
+
+static const struct iomap_ops blkdev_iomap_ops = {
+	.iomap_begin		= blkdev_iomap_begin,
+};
+
 static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, blkdev_get_block, wbc);
@@ -556,6 +578,11 @@  blkdev_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	return written;
 }
 
+static ssize_t blkdev_buffered_write(struct kiocb *iocb, struct iov_iter *from)
+{
+	return iomap_file_buffered_write(iocb, from, &blkdev_iomap_ops);
+}
+
 /*
  * Write data to the block device.  Only intended for the block device itself
  * and the raw driver which basically is a fake block device.
@@ -605,9 +632,9 @@  static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		ret = blkdev_direct_write(iocb, from);
 		if (ret >= 0 && iov_iter_count(from))
 			ret = direct_write_fallback(iocb, from, ret,
-					generic_perform_write(iocb, from));
+					blkdev_buffered_write(iocb, from));
 	} else {
-		ret = generic_perform_write(iocb, from);
+		ret = blkdev_buffered_write(iocb, from);
 	}
 
 	if (ret > 0)