diff mbox series

[1/4] block: bio-integrity: add support for user buffers

Message ID 20231018151843.3542335-2-kbusch@meta.com (mailing list archive)
State New
Headers show
Series block integrity: direclty map user space addresses | expand

Commit Message

Keith Busch Oct. 18, 2023, 3:18 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

User space passthrough commands that utilize metadata currently need to
bounce the "integrity" buffer through the kernel. This adds unnecessary
overhead and memory pressure.

Add support for mapping user space directly so that we can avoid this
costly copy. This is similiar to how the bio payload utilizes user
addresses with bio_map_user_iov().

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/bio-integrity.c | 67 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/bio.h   |  8 ++++++
 2 files changed, 75 insertions(+)

Comments

Christoph Hellwig Oct. 19, 2023, 5:39 a.m. UTC | #1
int bio_integrity_map_user(struct bio *bio, void __user *ubuf, unsigned int len,
> +			   u32 seed, u32 maxvecs)
> +{
> +	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
> +	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
> +	struct page *stack_pages[UIO_FASTIOV];
> +	size_t offset = offset_in_page(ubuf);
> +	unsigned long ptr = (uintptr_t)ubuf;
> +	struct page **pages = stack_pages;
> +	struct bio_integrity_payload *bip;
> +	int npages, ret, i;
> +
> +	if (bio_integrity(bio) || ptr & align || maxvecs > UIO_FASTIOV)
> +		return -EINVAL;

We also need to check the length for the dma alignment/pad, not
just the start.  (The undocumented iov_iter_alignment_iovec helper
obsfucateѕ this for the data path).

> +	bip = bio_integrity_alloc(bio, GFP_KERNEL, maxvecs);
> +	if (IS_ERR(bip))
> +		return PTR_ERR(bip);
> +
> +	ret = pin_user_pages_fast(ptr, UIO_FASTIOV, FOLL_WRITE, pages);
> +	if (unlikely(ret < 0))
> +		goto free_bip;
> +
> +	npages = ret;
> +	for (i = 0; i < npages; i++) {
> +		u32 bytes = min_t(u32, len, PAGE_SIZE - offset);
> +		ret = bio_integrity_add_page(bio, pages[i], bytes, offset);
> +		if (ret != bytes) {
> +			ret = -EINVAL;
> +			goto release_pages;
> +		}
> +		len -= ret;
> +		offset = 0;
> +	}

Any reason to not use the bio_vec array as the buffer, similar to the
data size here?

> +EXPORT_SYMBOL(bio_integrity_map_user);

Everything that just thinly wraps get_user_pages_fast needs to be
EXPORT_SYMBOL_GPL.
kernel test robot Oct. 21, 2023, 3:53 a.m. UTC | #2
Hi Keith,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.6-rc6 next-20231020]
[cannot apply to axboe-block/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Keith-Busch/block-bio-integrity-add-support-for-user-buffers/20231018-232704
base:   linus/master
patch link:    https://lore.kernel.org/r/20231018151843.3542335-2-kbusch%40meta.com
patch subject: [PATCH 1/4] block: bio-integrity: add support for user buffers
config: um-i386_defconfig (https://download.01.org/0day-ci/archive/20231021/202310211117.qmDPOVfI-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231021/202310211117.qmDPOVfI-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310211117.qmDPOVfI-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from include/linux/blkdev.h:17:0,
                    from init/main.c:85:
   include/linux/bio.h: In function 'bio_integrity_map_user':
>> include/linux/bio.h:798:1: error: expected ';' before '}' token
    }
    ^
--
   In file included from include/linux/blkdev.h:17:0,
                    from lib/vsprintf.c:47:
   include/linux/bio.h: In function 'bio_integrity_map_user':
>> include/linux/bio.h:798:1: error: expected ';' before '}' token
    }
    ^
   lib/vsprintf.c: In function 'va_format':
   lib/vsprintf.c:1682:2: warning: function 'va_format' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
     buf += vsnprintf(buf, end > buf ? end - buf : 0, va_fmt->fmt, va);
     ^~~


vim +798 include/linux/bio.h

   793	
   794	static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
   795						 unsigned int len, u32 seed, u32 maxvecs)
   796	{
   797		return -EINVAL
 > 798	}
   799
kernel test robot Oct. 21, 2023, 4:13 a.m. UTC | #3
Hi Keith,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v6.6-rc6 next-20231020]
[cannot apply to axboe-block/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Keith-Busch/block-bio-integrity-add-support-for-user-buffers/20231018-232704
base:   linus/master
patch link:    https://lore.kernel.org/r/20231018151843.3542335-2-kbusch%40meta.com
patch subject: [PATCH 1/4] block: bio-integrity: add support for user buffers
config: um-allnoconfig (https://download.01.org/0day-ci/archive/20231021/202310211209.gA0mAZaz-lkp@intel.com/config)
compiler: clang version 17.0.0 (https://github.com/llvm/llvm-project.git 4a5ac14ee968ff0ad5d2cc1ffa0299048db4c88a)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231021/202310211209.gA0mAZaz-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310211209.gA0mAZaz-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from init/main.c:21:
   In file included from include/linux/syscalls.h:90:
   In file included from include/trace/syscall.h:7:
   In file included from include/linux/trace_events.h:9:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
         |                                                   ^
   In file included from init/main.c:21:
   In file included from include/linux/syscalls.h:90:
   In file included from include/trace/syscall.h:7:
   In file included from include/linux/trace_events.h:9:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
         |                                                   ^
   In file included from init/main.c:21:
   In file included from include/linux/syscalls.h:90:
   In file included from include/trace/syscall.h:7:
   In file included from include/linux/trace_events.h:9:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     692 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     700 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     708 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     717 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     726 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     735 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   In file included from init/main.c:85:
   In file included from include/linux/blkdev.h:17:
>> include/linux/bio.h:797:16: error: expected ';' after return statement
     797 |         return -EINVAL
         |                       ^
         |                       ;
   12 warnings and 1 error generated.
--
   In file included from mm/swapfile.c:9:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:547:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     547 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:560:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     560 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:37:51: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
         |                                                   ^
   In file included from mm/swapfile.c:9:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:573:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     573 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/little_endian.h:35:51: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
         |                                                   ^
   In file included from mm/swapfile.c:9:
   In file included from include/linux/blkdev.h:9:
   In file included from include/linux/blk_types.h:10:
   In file included from include/linux/bvec.h:10:
   In file included from include/linux/highmem.h:12:
   In file included from include/linux/hardirq.h:11:
   In file included from arch/um/include/asm/hardirq.h:5:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/um/include/asm/io.h:24:
   include/asm-generic/io.h:584:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     584 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:594:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     594 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:604:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     604 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:692:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     692 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:700:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     700 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:708:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     708 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:717:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     717 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:726:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     726 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:735:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     735 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   In file included from mm/swapfile.c:9:
   In file included from include/linux/blkdev.h:17:
>> include/linux/bio.h:797:16: error: expected ';' after return statement
     797 |         return -EINVAL
         |                       ^
         |                       ;
   In file included from mm/swapfile.c:14:
   include/linux/mman.h:158:9: warning: division by zero is undefined [-Wdivision-by-zero]
     158 |                _calc_vm_trans(flags, MAP_SYNC,       VM_SYNC      ) |
         |                ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:136:21: note: expanded from macro '_calc_vm_trans'
     136 |    : ((x) & (bit1)) / ((bit1) / (bit2))))
         |                     ^ ~~~~~~~~~~~~~~~~~
   13 warnings and 1 error generated.


vim +797 include/linux/bio.h

   793	
   794	static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
   795						 unsigned int len, u32 seed, u32 maxvecs)
   796	{
 > 797		return -EINVAL
   798	}
   799
Kanchan Joshi Oct. 25, 2023, 12:51 p.m. UTC | #4
On 10/18/2023 8:48 PM, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> User space passthrough commands that utilize metadata currently need to
> bounce the "integrity" buffer through the kernel. This adds unnecessary
> overhead and memory pressure.
> 
> Add support for mapping user space directly so that we can avoid this
> costly copy. This is similiar to how the bio payload utilizes user
> addresses with bio_map_user_iov().
> 
> Signed-off-by: Keith Busch <kbusch@kernel.org>
> ---
>   block/bio-integrity.c | 67 +++++++++++++++++++++++++++++++++++++++++++
>   include/linux/bio.h   |  8 ++++++
>   2 files changed, 75 insertions(+)
> 
> diff --git a/block/bio-integrity.c b/block/bio-integrity.c
> index ec8ac8cf6e1b9..08f70b837a29b 100644
> --- a/block/bio-integrity.c
> +++ b/block/bio-integrity.c
> @@ -91,6 +91,19 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
>   }
>   EXPORT_SYMBOL(bio_integrity_alloc);
>   
> +static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
> +{
> +	bool dirty = bio_data_dir(bip->bip_bio) == READ;
> +	struct bvec_iter iter;
> +	struct bio_vec bv;
> +
> +	bip_for_each_vec(bv, bip, iter) {
> +		if (dirty && !PageCompound(bv.bv_page))
> +			set_page_dirty_lock(bv.bv_page);
> +		unpin_user_page(bv.bv_page);
> +	}
> +}
> +
>   /**
>    * bio_integrity_free - Free bio integrity payload
>    * @bio:	bio containing bip to be freed
> @@ -105,6 +118,8 @@ void bio_integrity_free(struct bio *bio)
>   
>   	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
>   		kfree(bvec_virt(bip->bip_vec));
> +	else if (bip->bip_flags & BIP_INTEGRITY_USER)
> +		bio_integrity_unmap_user(bip);;
>   
>   	__bio_integrity_free(bs, bip);
>   	bio->bi_integrity = NULL;
> @@ -160,6 +175,58 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
>   }
>   EXPORT_SYMBOL(bio_integrity_add_page);
>   
> +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, unsigned int len,
> +			   u32 seed, u32 maxvecs)
> +{
> +	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
> +	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
> +	struct page *stack_pages[UIO_FASTIOV];
> +	size_t offset = offset_in_page(ubuf);
> +	unsigned long ptr = (uintptr_t)ubuf;
> +	struct page **pages = stack_pages;
> +	struct bio_integrity_payload *bip;
> +	int npages, ret, i;
> +
> +	if (bio_integrity(bio) || ptr & align || maxvecs > UIO_FASTIOV)
> +		return -EINVAL;
> +
> +	bip = bio_integrity_alloc(bio, GFP_KERNEL, maxvecs);
> +	if (IS_ERR(bip))
> +		return PTR_ERR(bip);
> +
> +	ret = pin_user_pages_fast(ptr, UIO_FASTIOV, FOLL_WRITE, pages);

Why not pass maxvecs here? If you pass UIO_FASTIOV, it will map those 
many pages here. And will result into a leak (missed unpin) eventually 
(see below).

> +	if (unlikely(ret < 0))
> +		goto free_bip;
> +
> +	npages = ret;
> +	for (i = 0; i < npages; i++) {
> +		u32 bytes = min_t(u32, len, PAGE_SIZE - offset);

Nit: bytes can be declared outside.

> +		ret = bio_integrity_add_page(bio, pages[i], bytes, offset);
> +		if (ret != bytes) {
> +			ret = -EINVAL;
> +			goto release_pages;
> +		}
> +		len -= ret;

Take the case of single '4KB + 8b' io.
This len will become 0 in the first iteration.
But the loop continues for UIO_FASTIOV iterations. It will add only one 
page into bio_integrity_add_page.

And that is what it will unpin during bio_integrity_unmap_user(). 
Remaining pages will continue to remain pinned.
Keith Busch Oct. 25, 2023, 2:42 p.m. UTC | #5
On Wed, Oct 25, 2023 at 06:21:55PM +0530, Kanchan Joshi wrote:
> On 10/18/2023 8:48 PM, Keith Busch wrote:
> >   }
> >   EXPORT_SYMBOL(bio_integrity_add_page);
> >   
> > +int bio_integrity_map_user(struct bio *bio, void __user *ubuf, unsigned int len,
> > +			   u32 seed, u32 maxvecs)
> > +{
> > +	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
> > +	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
> > +	struct page *stack_pages[UIO_FASTIOV];
> > +	size_t offset = offset_in_page(ubuf);
> > +	unsigned long ptr = (uintptr_t)ubuf;
> > +	struct page **pages = stack_pages;
> > +	struct bio_integrity_payload *bip;
> > +	int npages, ret, i;
> > +
> > +	if (bio_integrity(bio) || ptr & align || maxvecs > UIO_FASTIOV)
> > +		return -EINVAL;
> > +
> > +	bip = bio_integrity_alloc(bio, GFP_KERNEL, maxvecs);
> > +	if (IS_ERR(bip))
> > +		return PTR_ERR(bip);
> > +
> > +	ret = pin_user_pages_fast(ptr, UIO_FASTIOV, FOLL_WRITE, pages);
> 
> Why not pass maxvecs here? If you pass UIO_FASTIOV, it will map those 
> many pages here. And will result into a leak (missed unpin) eventually 
> (see below).

The 'maxvecs' is for the number of bvecs, and UIO_FASTIOV is for the
number of pages. A single bvec can contain multiple pages, so the idea
was to attempt merging if multiple pages were required.

This patch though didn't calculate the pages right. Next version I'm
working on uses iov_iter instead. V2 also retains a kernel copy
fallback.
diff mbox series

Patch

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index ec8ac8cf6e1b9..08f70b837a29b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -91,6 +91,19 @@  struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 }
 EXPORT_SYMBOL(bio_integrity_alloc);
 
+static void bio_integrity_unmap_user(struct bio_integrity_payload *bip)
+{
+	bool dirty = bio_data_dir(bip->bip_bio) == READ;
+	struct bvec_iter iter;
+	struct bio_vec bv;
+
+	bip_for_each_vec(bv, bip, iter) {
+		if (dirty && !PageCompound(bv.bv_page))
+			set_page_dirty_lock(bv.bv_page);
+		unpin_user_page(bv.bv_page);
+	}
+}
+
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
@@ -105,6 +118,8 @@  void bio_integrity_free(struct bio *bio)
 
 	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
 		kfree(bvec_virt(bip->bip_vec));
+	else if (bip->bip_flags & BIP_INTEGRITY_USER)
+		bio_integrity_unmap_user(bip);;
 
 	__bio_integrity_free(bs, bip);
 	bio->bi_integrity = NULL;
@@ -160,6 +175,58 @@  int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
+int bio_integrity_map_user(struct bio *bio, void __user *ubuf, unsigned int len,
+			   u32 seed, u32 maxvecs)
+{
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+	unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
+	struct page *stack_pages[UIO_FASTIOV];
+	size_t offset = offset_in_page(ubuf);
+	unsigned long ptr = (uintptr_t)ubuf;
+	struct page **pages = stack_pages;
+	struct bio_integrity_payload *bip;
+	int npages, ret, i;
+
+	if (bio_integrity(bio) || ptr & align || maxvecs > UIO_FASTIOV)
+		return -EINVAL;
+
+	bip = bio_integrity_alloc(bio, GFP_KERNEL, maxvecs);
+	if (IS_ERR(bip))
+		return PTR_ERR(bip);
+
+	ret = pin_user_pages_fast(ptr, UIO_FASTIOV, FOLL_WRITE, pages);
+	if (unlikely(ret < 0))
+		goto free_bip;
+
+	npages = ret;
+	for (i = 0; i < npages; i++) {
+		u32 bytes = min_t(u32, len, PAGE_SIZE - offset);
+		ret = bio_integrity_add_page(bio, pages[i], bytes, offset);
+		if (ret != bytes) {
+			ret = -EINVAL;
+			goto release_pages;
+		}
+		len -= ret;
+		offset = 0;
+	}
+
+	if (len) {
+		ret = -EINVAL;
+		goto release_pages;
+	}
+
+	bip->bip_iter.bi_sector = seed;
+	bip->bip_flags |= BIP_INTEGRITY_USER;
+	return 0;
+
+release_pages:
+	unpin_user_pages(pages, npages);
+free_bip:
+	bio_integrity_free(bio);
+	return ret;
+}
+EXPORT_SYMBOL(bio_integrity_map_user);
+
 /**
  * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 41d417ee13499..144cc280b6ad3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,6 +324,7 @@  enum bip_flags {
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
 	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
+	BIP_INTEGRITY_USER	= 1 << 5, /* Integrity payload is user address */
 };
 
 /*
@@ -720,6 +721,7 @@  static inline bool bioset_initialized(struct bio_set *bs)
 
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
+extern int bio_integrity_map_user(struct bio *, void __user *, unsigned int, u32, u32);
 extern bool bio_integrity_prep(struct bio *);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *);
@@ -789,6 +791,12 @@  static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
 	return 0;
 }
 
+static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf,
+					 unsigned int len, u32 seed, u32 maxvecs)
+{
+	return -EINVAL
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 /*