diff mbox series

[V5,1/3] block: Allow mapping of vmalloc-ed buffers

Message ID 20190627092944.20957-2-damien.lemoal@wdc.com (mailing list archive)
State Superseded
Headers show
Series Fix zone revalidation memory allocation failures | expand

Commit Message

Damien Le Moal June 27, 2019, 9:29 a.m. UTC
To allow the SCSI subsystem scsi_execute_req() function to issue
requests using large buffers that are better allocated with vmalloc()
rather than kmalloc(), modify bio_map_kern() to allow passing a buffer
allocated with vmalloc().

To do so, detect vmalloc-ed buffers using is_vmalloc_addr(). For
vmalloc-ed buffers, flush the buffer using flush_kernel_vmap_range(),
use vmalloc_to_page() instead of virt_to_page() to obtain the pages of
the buffer, and invalidate the buffer addresses with
invalidate_kernel_vmap_range() on completion of read BIOs. This last
point is executed using the function bio_invalidate_vmalloc_pages()
which is defined only if the architecture defines
ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE, that is, if the architecture
actually needs the invalidation done.

Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation")
Fixes: e76239a3748c ("block: add a report_zones method")
Cc: stable@vger.kernel.org
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
---
 block/bio.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

Comments

Christoph Hellwig June 27, 2019, 2:06 p.m. UTC | #1
On Thu, Jun 27, 2019 at 06:29:42PM +0900, Damien Le Moal wrote:
> To allow the SCSI subsystem scsi_execute_req() function to issue
> requests using large buffers that are better allocated with vmalloc()
> rather than kmalloc(), modify bio_map_kern() to allow passing a buffer
> allocated with vmalloc().
> 
> To do so, detect vmalloc-ed buffers using is_vmalloc_addr(). For
> vmalloc-ed buffers, flush the buffer using flush_kernel_vmap_range(),
> use vmalloc_to_page() instead of virt_to_page() to obtain the pages of
> the buffer, and invalidate the buffer addresses with
> invalidate_kernel_vmap_range() on completion of read BIOs. This last
> point is executed using the function bio_invalidate_vmalloc_pages()
> which is defined only if the architecture defines
> ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE, that is, if the architecture
> actually needs the invalidation done.
> 
> Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation")
> Fixes: e76239a3748c ("block: add a report_zones method")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>  block/bio.c | 29 ++++++++++++++++++++++++++++-
>  1 file changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index ce797d73bb43..bbba5f08b2ef 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -16,6 +16,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/cgroup.h>
>  #include <linux/blk-cgroup.h>
> +#include <linux/highmem.h>
>  
>  #include <trace/events/block.h>
>  #include "blk.h"
> @@ -1479,8 +1480,22 @@ void bio_unmap_user(struct bio *bio)
>  	bio_put(bio);
>  }
>  
> +static void bio_invalidate_vmalloc_pages(struct bio *bio)
> +{
> +#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
> +	if (bio->bi_private && !op_is_write(bio_op(bio))) {
> +		unsigned long i, len = 0;
> +
> +		for (i = 0; i < bio->bi_vcnt; i++)
> +			len += bio->bi_io_vec[i].bv_len;
> +		invalidate_kernel_vmap_range(bio->bi_private, len);
> +	}
> +#endif
> +}

Normal Linux style is to keep the ifdefs outside the functions,
or use IS_ENABLED, although the latter would require
ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE to be a config option.  Not that
I personally care much.

> @@ -1531,6 +1557,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>  	}
>  
>  	bio->bi_end_io = bio_map_kern_endio;
> +
>  	return bio;
>  }

Superflous whitespace change.

Otherwise look good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
Chaitanya Kulkarni June 27, 2019, 5:09 p.m. UTC | #2
Looks good.

Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>

On 6/27/19 2:29 AM, Damien Le Moal wrote:
> To allow the SCSI subsystem scsi_execute_req() function to issue
> requests using large buffers that are better allocated with vmalloc()
> rather than kmalloc(), modify bio_map_kern() to allow passing a buffer
> allocated with vmalloc().
> 
> To do so, detect vmalloc-ed buffers using is_vmalloc_addr(). For
> vmalloc-ed buffers, flush the buffer using flush_kernel_vmap_range(),
> use vmalloc_to_page() instead of virt_to_page() to obtain the pages of
> the buffer, and invalidate the buffer addresses with
> invalidate_kernel_vmap_range() on completion of read BIOs. This last
> point is executed using the function bio_invalidate_vmalloc_pages()
> which is defined only if the architecture defines
> ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE, that is, if the architecture
> actually needs the invalidation done.
> 
> Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation")
> Fixes: e76239a3748c ("block: add a report_zones method")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>   block/bio.c | 29 ++++++++++++++++++++++++++++-
>   1 file changed, 28 insertions(+), 1 deletion(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index ce797d73bb43..bbba5f08b2ef 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -16,6 +16,7 @@
>   #include <linux/workqueue.h>
>   #include <linux/cgroup.h>
>   #include <linux/blk-cgroup.h>
> +#include <linux/highmem.h>
>   
>   #include <trace/events/block.h>
>   #include "blk.h"
> @@ -1479,8 +1480,22 @@ void bio_unmap_user(struct bio *bio)
>   	bio_put(bio);
>   }
>   
> +static void bio_invalidate_vmalloc_pages(struct bio *bio)
> +{
> +#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
> +	if (bio->bi_private && !op_is_write(bio_op(bio))) {
> +		unsigned long i, len = 0;
> +
> +		for (i = 0; i < bio->bi_vcnt; i++)
> +			len += bio->bi_io_vec[i].bv_len;
> +		invalidate_kernel_vmap_range(bio->bi_private, len);
> +	}
> +#endif
> +}
> +
>   static void bio_map_kern_endio(struct bio *bio)
>   {
> +	bio_invalidate_vmalloc_pages(bio);
>   	bio_put(bio);
>   }
>   
> @@ -1501,6 +1516,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>   	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
>   	unsigned long start = kaddr >> PAGE_SHIFT;
>   	const int nr_pages = end - start;
> +	bool is_vmalloc = is_vmalloc_addr(data);
> +	struct page *page;
>   	int offset, i;
>   	struct bio *bio;
>   
> @@ -1508,6 +1525,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>   	if (!bio)
>   		return ERR_PTR(-ENOMEM);
>   
> +	if (is_vmalloc) {
> +		flush_kernel_vmap_range(data, len);
> +		bio->bi_private = data;
> +	}
> +
>   	offset = offset_in_page(kaddr);
>   	for (i = 0; i < nr_pages; i++) {
>   		unsigned int bytes = PAGE_SIZE - offset;
> @@ -1518,7 +1540,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>   		if (bytes > len)
>   			bytes = len;
>   
> -		if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> +		if (!is_vmalloc)
> +			page = virt_to_page(data);
> +		else
> +			page = vmalloc_to_page(data);
> +		if (bio_add_pc_page(q, bio, page, bytes,
>   				    offset) < bytes) {
>   			/* we don't support partial mappings */
>   			bio_put(bio);
> @@ -1531,6 +1557,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>   	}
>   
>   	bio->bi_end_io = bio_map_kern_endio;
> +
>   	return bio;
>   }
>   EXPORT_SYMBOL(bio_map_kern);
>
Ming Lei June 28, 2019, 12:12 a.m. UTC | #3
On Thu, Jun 27, 2019 at 5:31 PM Damien Le Moal <damien.lemoal@wdc.com> wrote:
>
> To allow the SCSI subsystem scsi_execute_req() function to issue
> requests using large buffers that are better allocated with vmalloc()
> rather than kmalloc(), modify bio_map_kern() to allow passing a buffer
> allocated with vmalloc().
>
> To do so, detect vmalloc-ed buffers using is_vmalloc_addr(). For
> vmalloc-ed buffers, flush the buffer using flush_kernel_vmap_range(),
> use vmalloc_to_page() instead of virt_to_page() to obtain the pages of
> the buffer, and invalidate the buffer addresses with
> invalidate_kernel_vmap_range() on completion of read BIOs. This last
> point is executed using the function bio_invalidate_vmalloc_pages()
> which is defined only if the architecture defines
> ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE, that is, if the architecture
> actually needs the invalidation done.
>
> Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation")
> Fixes: e76239a3748c ("block: add a report_zones method")
> Cc: stable@vger.kernel.org
> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
> ---
>  block/bio.c | 29 ++++++++++++++++++++++++++++-
>  1 file changed, 28 insertions(+), 1 deletion(-)
>
> diff --git a/block/bio.c b/block/bio.c
> index ce797d73bb43..bbba5f08b2ef 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -16,6 +16,7 @@
>  #include <linux/workqueue.h>
>  #include <linux/cgroup.h>
>  #include <linux/blk-cgroup.h>
> +#include <linux/highmem.h>
>
>  #include <trace/events/block.h>
>  #include "blk.h"
> @@ -1479,8 +1480,22 @@ void bio_unmap_user(struct bio *bio)
>         bio_put(bio);
>  }
>
> +static void bio_invalidate_vmalloc_pages(struct bio *bio)
> +{
> +#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
> +       if (bio->bi_private && !op_is_write(bio_op(bio))) {
> +               unsigned long i, len = 0;
> +
> +               for (i = 0; i < bio->bi_vcnt; i++)
> +                       len += bio->bi_io_vec[i].bv_len;
> +               invalidate_kernel_vmap_range(bio->bi_private, len);
> +       }
> +#endif
> +}
> +
>  static void bio_map_kern_endio(struct bio *bio)
>  {
> +       bio_invalidate_vmalloc_pages(bio);
>         bio_put(bio);
>  }
>
> @@ -1501,6 +1516,8 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>         unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
>         unsigned long start = kaddr >> PAGE_SHIFT;
>         const int nr_pages = end - start;
> +       bool is_vmalloc = is_vmalloc_addr(data);
> +       struct page *page;
>         int offset, i;
>         struct bio *bio;
>
> @@ -1508,6 +1525,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>         if (!bio)
>                 return ERR_PTR(-ENOMEM);
>
> +       if (is_vmalloc) {
> +               flush_kernel_vmap_range(data, len);
> +               bio->bi_private = data;
> +       }
> +
>         offset = offset_in_page(kaddr);
>         for (i = 0; i < nr_pages; i++) {
>                 unsigned int bytes = PAGE_SIZE - offset;
> @@ -1518,7 +1540,11 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>                 if (bytes > len)
>                         bytes = len;
>
> -               if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> +               if (!is_vmalloc)
> +                       page = virt_to_page(data);
> +               else
> +                       page = vmalloc_to_page(data);
> +               if (bio_add_pc_page(q, bio, page, bytes,
>                                     offset) < bytes) {
>                         /* we don't support partial mappings */
>                         bio_put(bio);
> @@ -1531,6 +1557,7 @@ struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
>         }
>
>         bio->bi_end_io = bio_map_kern_endio;
> +
>         return bio;
>  }
>  EXPORT_SYMBOL(bio_map_kern);
> --
> 2.21.0
>

Looks fine:

Reviewed-by: Ming Lei <ming.lei@redhat.com>
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index ce797d73bb43..bbba5f08b2ef 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -16,6 +16,7 @@ 
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
 #include <linux/blk-cgroup.h>
+#include <linux/highmem.h>
 
 #include <trace/events/block.h>
 #include "blk.h"
@@ -1479,8 +1480,22 @@  void bio_unmap_user(struct bio *bio)
 	bio_put(bio);
 }
 
+static void bio_invalidate_vmalloc_pages(struct bio *bio)
+{
+#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+	if (bio->bi_private && !op_is_write(bio_op(bio))) {
+		unsigned long i, len = 0;
+
+		for (i = 0; i < bio->bi_vcnt; i++)
+			len += bio->bi_io_vec[i].bv_len;
+		invalidate_kernel_vmap_range(bio->bi_private, len);
+	}
+#endif
+}
+
 static void bio_map_kern_endio(struct bio *bio)
 {
+	bio_invalidate_vmalloc_pages(bio);
 	bio_put(bio);
 }
 
@@ -1501,6 +1516,8 @@  struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 	unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	unsigned long start = kaddr >> PAGE_SHIFT;
 	const int nr_pages = end - start;
+	bool is_vmalloc = is_vmalloc_addr(data);
+	struct page *page;
 	int offset, i;
 	struct bio *bio;
 
@@ -1508,6 +1525,11 @@  struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 	if (!bio)
 		return ERR_PTR(-ENOMEM);
 
+	if (is_vmalloc) {
+		flush_kernel_vmap_range(data, len);
+		bio->bi_private = data;
+	}
+
 	offset = offset_in_page(kaddr);
 	for (i = 0; i < nr_pages; i++) {
 		unsigned int bytes = PAGE_SIZE - offset;
@@ -1518,7 +1540,11 @@  struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 		if (bytes > len)
 			bytes = len;
 
-		if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
+		if (!is_vmalloc)
+			page = virt_to_page(data);
+		else
+			page = vmalloc_to_page(data);
+		if (bio_add_pc_page(q, bio, page, bytes,
 				    offset) < bytes) {
 			/* we don't support partial mappings */
 			bio_put(bio);
@@ -1531,6 +1557,7 @@  struct bio *bio_map_kern(struct request_queue *q, void *data, unsigned int len,
 	}
 
 	bio->bi_end_io = bio_map_kern_endio;
+
 	return bio;
 }
 EXPORT_SYMBOL(bio_map_kern);