diff mbox

ceph/iov_iter: fix bad iov_iter handling in ceph splice codepaths

Message ID 1483727016-343-1-git-send-email-jlayton@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jeff Layton Jan. 6, 2017, 6:23 p.m. UTC
xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
pick up a wrong offset and get stuck in an infinite loop while trying to
populate the page array.

To fix this, add a new iov_iter helper for to determine the offset into
the page for the current segment and have ceph call that.

dio_get_pagev_size has similar problems, so fix that as well by adding a
new helper function in lib/iov_iter.c that does what it does, but for
all iov_iter types.

Since we're moving that into generic code, we can also utilize the
iterate_all_kinds macro to handle this. That means that we need to
rework the logic a bit since we can't advance to the next vector while
checking the current one.

Link: http://tracker.ceph.com/issues/18130
Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
---
 fs/ceph/file.c      | 28 ++------------------
 include/linux/uio.h |  2 ++
 lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 26 deletions(-)

Comments

Jeff Layton Jan. 6, 2017, 8:36 p.m. UTC | #1
On Fri, 2017-01-06 at 13:23 -0500, Jeff Layton wrote:
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
> access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
> pick up a wrong offset and get stuck in an infinite loop while trying to
> populate the page array.
> 
> To fix this, add a new iov_iter helper for to determine the offset into
> the page for the current segment and have ceph call that.
> 
> dio_get_pagev_size has similar problems, so fix that as well by adding a
> new helper function in lib/iov_iter.c that does what it does, but for
> all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to handle this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/ceph/file.c      | 28 ++------------------
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>   */
>  
>  /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>   * Allocate a page vector based on (@it, @nbytes).
>   * The return value is the tuple describing a page vector,
>   * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
>  	struct page **pages;
>  	int ret = 0, idx, npages;
>  
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
>  	npages = calc_pages_for(align, nbytes);
>  	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
>  	if (!pages) {
> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
>  		size_t start = 0;
>  		ssize_t len;
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i);
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
>  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
>  unsigned long iov_iter_alignment(const struct iov_iter *i);
>  unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
>  void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
>  			unsigned long nr_segs, size_t count);
>  void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..d22850bbb1e9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
>  void iov_iter_kvec(struct iov_iter *i, int direction,
>  			const struct kvec *kvec, unsigned long nr_segs,
>  			size_t count)
> @@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +
> +	pv_size -= i->iov_offset;
> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,

Ugh, but this breaks generic/113...let me see what's going on there.
Jeff Layton Jan. 9, 2017, 11:11 p.m. UTC | #2
On Fri, 2017-01-06 at 13:23 -0500, Jeff Layton wrote:
> xfstest generic/095 triggers soft lockups in kcephfs. Basically it uses
> fio to drive some I/O via vmsplice ane splice. Ceph then ends trying to
> access an ITER_BVEC type iov_iter as a ITER_IOVEC one. That causes it to
> pick up a wrong offset and get stuck in an infinite loop while trying to
> populate the page array.
> 
> To fix this, add a new iov_iter helper for to determine the offset into
> the page for the current segment and have ceph call that.
> 
> dio_get_pagev_size has similar problems, so fix that as well by adding a
> new helper function in lib/iov_iter.c that does what it does, but for
> all iov_iter types.
> 
> Since we're moving that into generic code, we can also utilize the
> iterate_all_kinds macro to handle this. That means that we need to
> rework the logic a bit since we can't advance to the next vector while
> checking the current one.
> 
> Link: http://tracker.ceph.com/issues/18130
> Cc: "Zhu, Caifeng" <zhucaifeng@unissoft-nj.com>
> Signed-off-by: Jeff Layton <jlayton@redhat.com>
> ---
>  fs/ceph/file.c      | 28 ++------------------
>  include/linux/uio.h |  2 ++
>  lib/iov_iter.c      | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 77 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index f633165f3fdc..0cd9fc68a04e 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -35,29 +35,6 @@
>   */
>  
>  /*
> - * Calculate the length sum of direct io vectors that can
> - * be combined into one page vector.
> - */
> -static size_t dio_get_pagev_size(const struct iov_iter *it)
> -{
> -    const struct iovec *iov = it->iov;
> -    const struct iovec *iovend = iov + it->nr_segs;
> -    size_t size;
> -
> -    size = iov->iov_len - it->iov_offset;
> -    /*
> -     * An iov can be page vectored when both the current tail
> -     * and the next base are page aligned.
> -     */
> -    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
> -           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
> -        size += iov->iov_len;
> -    }
> -    dout("dio_get_pagevlen len = %zu\n", size);
> -    return size;
> -}
> -
> -/*
>   * Allocate a page vector based on (@it, @nbytes).
>   * The return value is the tuple describing a page vector,
>   * that is (@pages, @page_align, @num_pages).
> @@ -71,8 +48,7 @@ dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
>  	struct page **pages;
>  	int ret = 0, idx, npages;
>  
> -	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
> -		(PAGE_SIZE - 1);
> +	align = iov_iter_single_seg_page_offset(it);
>  	npages = calc_pages_for(align, nbytes);
>  	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
>  	if (!pages) {

Found the bug with the earlier patch, and will be sending an updated one
after some more testing.

That said, I would love to be able to just get rid of ceph's
dio_get_pages_alloc as well and just use iov_iter_get_pages_alloc
instead, but I don't think we can because the bio_vec iterator block
does this:

                /* can't be more than PAGE_SIZE */
                *start = v.bv_offset;
                *pages = p = get_pages_array(1);
                if (!p)
                        return -ENOMEM;
                get_page(*p = v.bv_page);
                return v.bv_len;

...so what happens here is that iter_file_splice_write creates an
ITER_BVEC iov_iter. Then any filesystem driver that uses
iov_iter_get_pages or iov_iter_get_pages_alloc can only get a single
page for each call. The ceph code works around this by calling
iov_iter_get_pages in a loop until it gets enough, but that's rather
nasty.

I bet this makes splice writing to an O_DIRECT file on NFS suck too...

Why _is_ ITER_BVEC limited to a single page there? I wonder if we could
consider lifting that limitation in some cases if it really is
necessary?

> @@ -927,7 +903,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	while (iov_iter_count(iter) > 0) {
> -		u64 size = dio_get_pagev_size(iter);
> +		u64 size = iov_iter_pvec_size(iter);
>  		size_t start = 0;
>  		ssize_t len;
> 
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 6e22b544d039..46fdfff3d7d6 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -83,6 +83,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  void iov_iter_advance(struct iov_iter *i, size_t bytes);
>  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
>  size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i);
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> @@ -93,6 +94,7 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
>  size_t iov_iter_zero(size_t bytes, struct iov_iter *);
>  unsigned long iov_iter_alignment(const struct iov_iter *i);
>  unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> +size_t iov_iter_pvec_size(const struct iov_iter *i);
>  void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
>  			unsigned long nr_segs, size_t count);
>  void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 6b415b5a100d..d22850bbb1e9 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -745,6 +745,24 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
> +/* Return offset into page for current iov_iter segment */
> +size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
> +{
> +	size_t	base;
> +
> +	if (i->type & ITER_PIPE)
> +		base = i->pipe->bufs[i->idx].offset;
> +	else if (i->type & ITER_BVEC)
> +		base = i->bvec->bv_offset;
> +	else if (i->type & ITER_KVEC)
> +		base = (size_t)i->kvec->iov_base;
> +	else
> +		base = (size_t)i->iov->iov_base;
> +
> +	return (base + i->iov_offset) & (PAGE_SIZE - 1);
> +}
> +EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
> +
>  void iov_iter_kvec(struct iov_iter *i, int direction,
>  			const struct kvec *kvec, unsigned long nr_segs,
>  			size_t count)
> @@ -830,6 +848,61 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  }
>  EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
> +/**
> + * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
> + * @i: iov_iter to in which to find the size
> + *
> + * Some filesystems can stitch together multiple iovecs into a single
> + * page vector when both the previous tail and current base are page
> + * aligned. This function discovers the length that can fit in a single
> + * pagevec and returns it.
> + */
> +size_t iov_iter_pvec_size(const struct iov_iter *i)
> +{
> +	size_t size = i->count;
> +	size_t pv_size = 0;
> +	bool contig = false, first = true;
> +
> +	if (!size)
> +		return 0;
> +
> +	/* Pipes are naturally aligned for this */
> +	if (unlikely(i->type & ITER_PIPE))
> +		return size;
> +
> +	/*
> +	 * An iov can be page vectored when the current base and previous
> +	 * tail are both page aligned. Note that we don't require that the
> +	 * initial base in the first iovec also be page aligned.
> +	 */
> +	iterate_all_kinds(i, size, v,
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }; 0;
> +		 }),
> +		({
> +		 if (first || (contig && v.bv_offset == 0)) {
> +			pv_size += v.bv_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
> +		 }
> +		 }),
> +		({
> +		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
> +			pv_size += v.iov_len;
> +			first = false;
> +			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
> +		 }
> +		 }))
> +
> +	pv_size -= i->iov_offset;

The bug in this patch is the above. iterate_all_kinds will have already
accounted for the iov_offset, so there's no need to do this here (and it
throws off the calculation when you have to go around more than once).

> +	return pv_size;
> +}
> +EXPORT_SYMBOL(iov_iter_pvec_size);
> +
>  static inline size_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
>  				struct page **pages,
diff mbox

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index f633165f3fdc..0cd9fc68a04e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -35,29 +35,6 @@ 
  */
 
 /*
- * Calculate the length sum of direct io vectors that can
- * be combined into one page vector.
- */
-static size_t dio_get_pagev_size(const struct iov_iter *it)
-{
-    const struct iovec *iov = it->iov;
-    const struct iovec *iovend = iov + it->nr_segs;
-    size_t size;
-
-    size = iov->iov_len - it->iov_offset;
-    /*
-     * An iov can be page vectored when both the current tail
-     * and the next base are page aligned.
-     */
-    while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) &&
-           (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) {
-        size += iov->iov_len;
-    }
-    dout("dio_get_pagevlen len = %zu\n", size);
-    return size;
-}
-
-/*
  * Allocate a page vector based on (@it, @nbytes).
  * The return value is the tuple describing a page vector,
  * that is (@pages, @page_align, @num_pages).
@@ -71,8 +48,7 @@  dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes,
 	struct page **pages;
 	int ret = 0, idx, npages;
 
-	align = (unsigned long)(it->iov->iov_base + it->iov_offset) &
-		(PAGE_SIZE - 1);
+	align = iov_iter_single_seg_page_offset(it);
 	npages = calc_pages_for(align, nbytes);
 	pages = kmalloc(sizeof(*pages) * npages, GFP_KERNEL);
 	if (!pages) {
@@ -927,7 +903,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	while (iov_iter_count(iter) > 0) {
-		u64 size = dio_get_pagev_size(iter);
+		u64 size = iov_iter_pvec_size(iter);
 		size_t start = 0;
 		ssize_t len;
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e22b544d039..46fdfff3d7d6 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -83,6 +83,7 @@  size_t iov_iter_copy_from_user_atomic(struct page *page,
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
 size_t iov_iter_single_seg_count(const struct iov_iter *i);
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i);
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
@@ -93,6 +94,7 @@  size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
+size_t iov_iter_pvec_size(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
 void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 6b415b5a100d..d22850bbb1e9 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -745,6 +745,24 @@  size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
+/* Return offset into page for current iov_iter segment */
+size_t iov_iter_single_seg_page_offset(const struct iov_iter *i)
+{
+	size_t	base;
+
+	if (i->type & ITER_PIPE)
+		base = i->pipe->bufs[i->idx].offset;
+	else if (i->type & ITER_BVEC)
+		base = i->bvec->bv_offset;
+	else if (i->type & ITER_KVEC)
+		base = (size_t)i->kvec->iov_base;
+	else
+		base = (size_t)i->iov->iov_base;
+
+	return (base + i->iov_offset) & (PAGE_SIZE - 1);
+}
+EXPORT_SYMBOL(iov_iter_single_seg_page_offset);
+
 void iov_iter_kvec(struct iov_iter *i, int direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
@@ -830,6 +848,61 @@  unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_gap_alignment);
 
+/**
+ * iov_iter_pvec_size - find length of page aligned iovecs in iov_iter
+ * @i: iov_iter to in which to find the size
+ *
+ * Some filesystems can stitch together multiple iovecs into a single
+ * page vector when both the previous tail and current base are page
+ * aligned. This function discovers the length that can fit in a single
+ * pagevec and returns it.
+ */
+size_t iov_iter_pvec_size(const struct iov_iter *i)
+{
+	size_t size = i->count;
+	size_t pv_size = 0;
+	bool contig = false, first = true;
+
+	if (!size)
+		return 0;
+
+	/* Pipes are naturally aligned for this */
+	if (unlikely(i->type & ITER_PIPE))
+		return size;
+
+	/*
+	 * An iov can be page vectored when the current base and previous
+	 * tail are both page aligned. Note that we don't require that the
+	 * initial base in the first iovec also be page aligned.
+	 */
+	iterate_all_kinds(i, size, v,
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }; 0;
+		 }),
+		({
+		 if (first || (contig && v.bv_offset == 0)) {
+			pv_size += v.bv_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.bv_offset + v.bv_len);
+		 }
+		 }),
+		({
+		 if (first || (contig && PAGE_ALIGNED(v.iov_base))) {
+			pv_size += v.iov_len;
+			first = false;
+			contig = PAGE_ALIGNED(v.iov_base + v.iov_len);
+		 }
+		 }))
+
+	pv_size -= i->iov_offset;
+	return pv_size;
+}
+EXPORT_SYMBOL(iov_iter_pvec_size);
+
 static inline size_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
 				struct page **pages,