diff mbox series

[01/29] iov_iter: Switch to using a table of operations

Message ID 160596801020.154728.15935034745159191564.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series RFC: iov_iter: Switch to using an ops table | expand

Commit Message

David Howells Nov. 21, 2020, 2:13 p.m. UTC
Switch to using a table of operations.  In a future patch the individual
methods will be split up by type.  For the moment, however, the ops tables
just jump directly to the old functions - which are now static.  Inline
wrappers are provided to jump through the hooks.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 fs/io_uring.c       |    2 
 include/linux/uio.h |  241 ++++++++++++++++++++++++++++++++++--------
 lib/iov_iter.c      |  293 +++++++++++++++++++++++++++++++++++++++------------
 3 files changed, 422 insertions(+), 114 deletions(-)

Comments

Pavel Begunkov Nov. 21, 2020, 2:31 p.m. UTC | #1
On 21/11/2020 14:13, David Howells wrote:
> Switch to using a table of operations.  In a future patch the individual
> methods will be split up by type.  For the moment, however, the ops tables
> just jump directly to the old functions - which are now static.  Inline
> wrappers are provided to jump through the hooks.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  fs/io_uring.c       |    2 
>  include/linux/uio.h |  241 ++++++++++++++++++++++++++++++++++--------
>  lib/iov_iter.c      |  293 +++++++++++++++++++++++++++++++++++++++------------
>  3 files changed, 422 insertions(+), 114 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 4ead291b2976..baa78f58ae5c 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -3192,7 +3192,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
>  	rw->free_iovec = iovec;
>  	rw->bytes_done = 0;
>  	/* can only be fixed buffers, no need to do anything */
> -	if (iter->type == ITER_BVEC)
> +	if (iov_iter_is_bvec(iter))

Could you split this io_uring change and send for 5.10?
Or I can do it for you if you wish.

>  		return;
>  	if (!iovec) {
>  		unsigned iov_off = 0;
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 72d88566694e..45ee087f8c43 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -32,9 +32,10 @@ struct iov_iter {
>  	 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
>  	 * the caller isn't expecting to drop a page reference when done.
>  	 */
> -	unsigned int type;
> +	unsigned int flags;
>  	size_t iov_offset;
>  	size_t count;
> +	const struct iov_iter_ops *ops;
>  	union {
>  		const struct iovec *iov;
>  		const struct kvec *kvec;
> @@ -50,9 +51,63 @@ struct iov_iter {
>  	};
>  };
>  
> +void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
> +			unsigned long nr_segs, size_t count);
> +void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
> +			unsigned long nr_segs, size_t count);
> +void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
> +			unsigned long nr_segs, size_t count);
> +void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
> +			size_t count);
> +void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
> +
> +struct iov_iter_ops {
> +	enum iter_type type;
> +	size_t (*copy_from_user_atomic)(struct page *page, struct iov_iter *i,
> +					unsigned long offset, size_t bytes);
> +	void (*advance)(struct iov_iter *i, size_t bytes);
> +	void (*revert)(struct iov_iter *i, size_t bytes);
> +	int (*fault_in_readable)(struct iov_iter *i, size_t bytes);
> +	size_t (*single_seg_count)(const struct iov_iter *i);
> +	size_t (*copy_page_to_iter)(struct page *page, size_t offset, size_t bytes,
> +				    struct iov_iter *i);
> +	size_t (*copy_page_from_iter)(struct page *page, size_t offset, size_t bytes,
> +				      struct iov_iter *i);
> +	size_t (*copy_to_iter)(const void *addr, size_t bytes, struct iov_iter *i);
> +	size_t (*copy_from_iter)(void *addr, size_t bytes, struct iov_iter *i);
> +	bool (*copy_from_iter_full)(void *addr, size_t bytes, struct iov_iter *i);
> +	size_t (*copy_from_iter_nocache)(void *addr, size_t bytes, struct iov_iter *i);
> +	bool (*copy_from_iter_full_nocache)(void *addr, size_t bytes, struct iov_iter *i);
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	size_t (*copy_from_iter_flushcache)(void *addr, size_t bytes, struct iov_iter *i);
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	size_t (*copy_mc_to_iter)(const void *addr, size_t bytes, struct iov_iter *i);
> +#endif
> +	size_t (*csum_and_copy_to_iter)(const void *addr, size_t bytes, void *csump,
> +					struct iov_iter *i);
> +	size_t (*csum_and_copy_from_iter)(void *addr, size_t bytes, __wsum *csum,
> +					  struct iov_iter *i);
> +	bool (*csum_and_copy_from_iter_full)(void *addr, size_t bytes, __wsum *csum,
> +					     struct iov_iter *i);
> +
> +	size_t (*zero)(size_t bytes, struct iov_iter *i);
> +	unsigned long (*alignment)(const struct iov_iter *i);
> +	unsigned long (*gap_alignment)(const struct iov_iter *i);
> +	ssize_t (*get_pages)(struct iov_iter *i, struct page **pages,
> +			     size_t maxsize, unsigned maxpages, size_t *start);
> +	ssize_t (*get_pages_alloc)(struct iov_iter *i, struct page ***pages,
> +				   size_t maxsize, size_t *start);
> +	int (*npages)(const struct iov_iter *i, int maxpages);
> +	const void *(*dup_iter)(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
> +	int (*for_each_range)(struct iov_iter *i, size_t bytes,
> +			      int (*f)(struct kvec *vec, void *context),
> +			      void *context);
> +};
> +
>  static inline enum iter_type iov_iter_type(const struct iov_iter *i)
>  {
> -	return i->type & ~(READ | WRITE);
> +	return i->ops->type;
>  }
>  
>  static inline bool iter_is_iovec(const struct iov_iter *i)
> @@ -82,7 +137,7 @@ static inline bool iov_iter_is_discard(const struct iov_iter *i)
>  
>  static inline unsigned char iov_iter_rw(const struct iov_iter *i)
>  {
> -	return i->type & (READ | WRITE);
> +	return i->flags & (READ | WRITE);
>  }
>  
>  /*
> @@ -111,22 +166,71 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
>  	};
>  }
>  
> -size_t iov_iter_copy_from_user_atomic(struct page *page,
> -		struct iov_iter *i, unsigned long offset, size_t bytes);
> -void iov_iter_advance(struct iov_iter *i, size_t bytes);
> -void iov_iter_revert(struct iov_iter *i, size_t bytes);
> -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
> -size_t iov_iter_single_seg_count(const struct iov_iter *i);
> +static inline
> +size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i,
> +				      unsigned long offset, size_t bytes)
> +{
> +	return i->ops->copy_from_user_atomic(page, i, offset, bytes);
> +}
> +static inline
> +void iov_iter_advance(struct iov_iter *i, size_t bytes)
> +{
> +	return i->ops->advance(i, bytes);
> +}
> +static inline
> +void iov_iter_revert(struct iov_iter *i, size_t bytes)
> +{
> +	return i->ops->revert(i, bytes);
> +}
> +static inline
> +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
> +{
> +	return i->ops->fault_in_readable(i, bytes);
> +}
> +static inline
> +size_t iov_iter_single_seg_count(const struct iov_iter *i)
> +{
> +	return i->ops->single_seg_count(i);
> +}
> +
> +static inline
>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
> -			 struct iov_iter *i);
> +				       struct iov_iter *i)
> +{
> +	return i->ops->copy_page_to_iter(page, offset, bytes, i);
> +}
> +static inline
>  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> -			 struct iov_iter *i);
> +					 struct iov_iter *i)
> +{
> +	return i->ops->copy_page_from_iter(page, offset, bytes, i);
> +}
>  
> -size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
> -size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
> -bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
> -size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
> -bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
> +static __always_inline __must_check
> +size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->copy_to_iter(addr, bytes, i);
> +}
> +static __always_inline __must_check
> +size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->copy_from_iter(addr, bytes, i);
> +}
> +static __always_inline __must_check
> +bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->copy_from_iter_full(addr, bytes, i);
> +}
> +static __always_inline __must_check
> +size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->copy_from_iter_nocache(addr, bytes, i);
> +}
> +static __always_inline __must_check
> +bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->copy_from_iter_full_nocache(addr, bytes, i);
> +}
>  
>  static __always_inline __must_check
>  size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
> @@ -173,23 +277,21 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
>  		return _copy_from_iter_full_nocache(addr, bytes, i);
>  }
>  
> -#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
>  /*
>   * Note, users like pmem that depend on the stricter semantics of
>   * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
>   * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
>   * destination is flushed from the cache on return.
>   */
> -size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
> -#else
> -#define _copy_from_iter_flushcache _copy_from_iter_nocache
> -#endif
> -
> -#ifdef CONFIG_ARCH_HAS_COPY_MC
> -size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
> +static __always_inline __must_check
> +size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	return i->ops->copy_from_iter_flushcache(addr, bytes, i);
>  #else
> -#define _copy_mc_to_iter _copy_to_iter
> +	return i->ops->copy_from_iter_nocache(addr, bytes, i);
>  #endif
> +}
>  
>  static __always_inline __must_check
>  size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
> @@ -200,6 +302,16 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
>  		return _copy_from_iter_flushcache(addr, bytes, i);
>  }
>  
> +static __always_inline __must_check
> +size_t _copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
> +{
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	return i->ops->copy_mc_to_iter(addr, bytes, i);
> +#else
> +	return i->ops->copy_to_iter(addr, bytes, i);
> +#endif
> +}
> +
>  static __always_inline __must_check
>  size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
>  {
> @@ -209,25 +321,47 @@ size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
>  		return _copy_mc_to_iter(addr, bytes, i);
>  }
>  
> -size_t iov_iter_zero(size_t bytes, struct iov_iter *);
> -unsigned long iov_iter_alignment(const struct iov_iter *i);
> -unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
> -void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
> -			unsigned long nr_segs, size_t count);
> -void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
> -			unsigned long nr_segs, size_t count);
> -void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
> -			unsigned long nr_segs, size_t count);
> -void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
> -			size_t count);
> -void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
> +static inline
> +size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
> +{
> +	return i->ops->zero(bytes, i);
> +}
> +static inline
> +unsigned long iov_iter_alignment(const struct iov_iter *i)
> +{
> +	return i->ops->alignment(i);
> +}
> +static inline
> +unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
> +{
> +	return i->ops->gap_alignment(i);
> +}
> +
> +static inline
>  ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
> -			size_t maxsize, unsigned maxpages, size_t *start);
> +			size_t maxsize, unsigned maxpages, size_t *start)
> +{
> +	return i->ops->get_pages(i, pages, maxsize, maxpages, start);
> +}
> +
> +static inline
>  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
> -			size_t maxsize, size_t *start);
> -int iov_iter_npages(const struct iov_iter *i, int maxpages);
> +			size_t maxsize, size_t *start)
> +{
> +	return i->ops->get_pages_alloc(i, pages, maxsize, start);
> +}
> +
> +static inline
> +int iov_iter_npages(const struct iov_iter *i, int maxpages)
> +{
> +	return i->ops->npages(i, maxpages);
> +}
>  
> -const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
> +static inline
> +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
> +{
> +	return old->ops->dup_iter(new, old, flags);
> +}
>  
>  static inline size_t iov_iter_count(const struct iov_iter *i)
>  {
> @@ -260,9 +394,22 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
>  {
>  	i->count = count;
>  }
> -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i);
> -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
> -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
> +
> +static inline
> +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i)
> +{
> +	return i->ops->csum_and_copy_to_iter(addr, bytes, csump, i);
> +}
> +static inline
> +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
> +{
> +	return i->ops->csum_and_copy_from_iter(addr, bytes, csum, i);
> +}
> +static inline
> +bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
> +{
> +	return i->ops->csum_and_copy_from_iter_full(addr, bytes, csum, i);
> +}
>  size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
>  		struct iov_iter *i);
>  
> @@ -278,8 +425,12 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
>  int import_single_range(int type, void __user *buf, size_t len,
>  		 struct iovec *iov, struct iov_iter *i);
>  
> +static inline
>  int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
>  			    int (*f)(struct kvec *vec, void *context),
> -			    void *context);
> +			    void *context)
> +{
> +	return i->ops->for_each_range(i, bytes, f, context);
> +}
>  
>  #endif
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 1635111c5bd2..e403d524c797 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -13,6 +13,12 @@
>  #include <linux/scatterlist.h>
>  #include <linux/instrumented.h>
>  
> +static const struct iov_iter_ops iovec_iter_ops;
> +static const struct iov_iter_ops kvec_iter_ops;
> +static const struct iov_iter_ops bvec_iter_ops;
> +static const struct iov_iter_ops pipe_iter_ops;
> +static const struct iov_iter_ops discard_iter_ops;
> +
>  #define PIPE_PARANOIA /* for now */
>  
>  #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
> @@ -81,15 +87,15 @@
>  #define iterate_all_kinds(i, n, v, I, B, K) {			\
>  	if (likely(n)) {					\
>  		size_t skip = i->iov_offset;			\
> -		if (unlikely(i->type & ITER_BVEC)) {		\
> +		if (unlikely(iov_iter_type(i) & ITER_BVEC)) {		\
>  			struct bio_vec v;			\
>  			struct bvec_iter __bi;			\
>  			iterate_bvec(i, n, v, __bi, skip, (B))	\
> -		} else if (unlikely(i->type & ITER_KVEC)) {	\
> +		} else if (unlikely(iov_iter_type(i) & ITER_KVEC)) {	\
>  			const struct kvec *kvec;		\
>  			struct kvec v;				\
>  			iterate_kvec(i, n, v, kvec, skip, (K))	\
> -		} else if (unlikely(i->type & ITER_DISCARD)) {	\
> +		} else if (unlikely(iov_iter_type(i) & ITER_DISCARD)) {	\
>  		} else {					\
>  			const struct iovec *iov;		\
>  			struct iovec v;				\
> @@ -103,7 +109,7 @@
>  		n = i->count;					\
>  	if (i->count) {						\
>  		size_t skip = i->iov_offset;			\
> -		if (unlikely(i->type & ITER_BVEC)) {		\
> +		if (unlikely(iov_iter_type(i) & ITER_BVEC)) {		\
>  			const struct bio_vec *bvec = i->bvec;	\
>  			struct bio_vec v;			\
>  			struct bvec_iter __bi;			\
> @@ -111,7 +117,7 @@
>  			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
>  			i->nr_segs -= i->bvec - bvec;		\
>  			skip = __bi.bi_bvec_done;		\
> -		} else if (unlikely(i->type & ITER_KVEC)) {	\
> +		} else if (unlikely(iov_iter_type(i) & ITER_KVEC)) {	\
>  			const struct kvec *kvec;		\
>  			struct kvec v;				\
>  			iterate_kvec(i, n, v, kvec, skip, (K))	\
> @@ -121,7 +127,7 @@
>  			}					\
>  			i->nr_segs -= kvec - i->kvec;		\
>  			i->kvec = kvec;				\
> -		} else if (unlikely(i->type & ITER_DISCARD)) {	\
> +		} else if (unlikely(iov_iter_type(i) & ITER_DISCARD)) {	\
>  			skip += n;				\
>  		} else {					\
>  			const struct iovec *iov;		\
> @@ -427,14 +433,14 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
>   * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
>   * because it is an invalid address).
>   */
> -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
> +static int xxx_fault_in_readable(struct iov_iter *i, size_t bytes)
>  {
>  	size_t skip = i->iov_offset;
>  	const struct iovec *iov;
>  	int err;
>  	struct iovec v;
>  
> -	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
> +	if (!(iov_iter_type(i) & (ITER_BVEC|ITER_KVEC))) {
>  		iterate_iovec(i, bytes, v, iov, skip, ({
>  			err = fault_in_pages_readable(v.iov_base, v.iov_len);
>  			if (unlikely(err))
> @@ -443,7 +449,6 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
>  	}
>  	return 0;
>  }
> -EXPORT_SYMBOL(iov_iter_fault_in_readable);
>  
>  void iov_iter_init(struct iov_iter *i, unsigned int direction,
>  			const struct iovec *iov, unsigned long nr_segs,
> @@ -454,10 +459,12 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
>  
>  	/* It will get better.  Eventually... */
>  	if (uaccess_kernel()) {
> -		i->type = ITER_KVEC | direction;
> +		i->ops = &kvec_iter_ops;
> +		i->flags = direction;
>  		i->kvec = (struct kvec *)iov;
>  	} else {
> -		i->type = ITER_IOVEC | direction;
> +		i->ops = &iovec_iter_ops;
> +		i->flags = direction;
>  		i->iov = iov;
>  	}
>  	i->nr_segs = nr_segs;
> @@ -625,7 +632,7 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
>  	return bytes;
>  }
>  
> -size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
> +static size_t xxx_copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	const char *from = addr;
>  	if (unlikely(iov_iter_is_pipe(i)))
> @@ -641,7 +648,6 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL(_copy_to_iter);
>  
>  #ifdef CONFIG_ARCH_HAS_COPY_MC
>  static int copyout_mc(void __user *to, const void *from, size_t n)
> @@ -723,7 +729,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
>   *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
>   *   a short copy.
>   */
> -size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
> +static size_t xxx_copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	const char *from = addr;
>  	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
> @@ -757,10 +763,9 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
>  #endif /* CONFIG_ARCH_HAS_COPY_MC */
>  
> -size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
> +static size_t xxx_copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	char *to = addr;
>  	if (unlikely(iov_iter_is_pipe(i))) {
> @@ -778,9 +783,8 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL(_copy_from_iter);
>  
> -bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
> +static bool xxx_copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	char *to = addr;
>  	if (unlikely(iov_iter_is_pipe(i))) {
> @@ -805,9 +809,8 @@ bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
>  	iov_iter_advance(i, bytes);
>  	return true;
>  }
> -EXPORT_SYMBOL(_copy_from_iter_full);
>  
> -size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
> +static size_t xxx_copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	char *to = addr;
>  	if (unlikely(iov_iter_is_pipe(i))) {
> @@ -824,7 +827,6 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL(_copy_from_iter_nocache);
>  
>  #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
>  /**
> @@ -841,7 +843,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
>   * bypass the cache for the ITER_IOVEC case, and on some archs may use
>   * instructions that strand dirty-data in the cache.
>   */
> -size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
> +static size_t xxx_copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	char *to = addr;
>  	if (unlikely(iov_iter_is_pipe(i))) {
> @@ -859,10 +861,9 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
>  #endif
>  
> -bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
> +static bool xxx_copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	char *to = addr;
>  	if (unlikely(iov_iter_is_pipe(i))) {
> @@ -884,7 +885,6 @@ bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
>  	iov_iter_advance(i, bytes);
>  	return true;
>  }
> -EXPORT_SYMBOL(_copy_from_iter_full_nocache);
>  
>  static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
>  {
> @@ -910,12 +910,12 @@ static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
>  	return false;
>  }
>  
> -size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
> +static size_t xxx_copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i)
>  {
>  	if (unlikely(!page_copy_sane(page, offset, bytes)))
>  		return 0;
> -	if (i->type & (ITER_BVEC|ITER_KVEC)) {
> +	if (iov_iter_type(i) & (ITER_BVEC|ITER_KVEC)) {
>  		void *kaddr = kmap_atomic(page);
>  		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
>  		kunmap_atomic(kaddr);
> @@ -927,9 +927,8 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
>  	else
>  		return copy_page_to_iter_pipe(page, offset, bytes, i);
>  }
> -EXPORT_SYMBOL(copy_page_to_iter);
>  
> -size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
> +static size_t xxx_copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
>  			 struct iov_iter *i)
>  {
>  	if (unlikely(!page_copy_sane(page, offset, bytes)))
> @@ -938,15 +937,14 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
>  		WARN_ON(1);
>  		return 0;
>  	}
> -	if (i->type & (ITER_BVEC|ITER_KVEC)) {
> +	if (iov_iter_type(i) & (ITER_BVEC|ITER_KVEC)) {
>  		void *kaddr = kmap_atomic(page);
> -		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
> +		size_t wanted = xxx_copy_from_iter(kaddr + offset, bytes, i);
>  		kunmap_atomic(kaddr);
>  		return wanted;
>  	} else
>  		return copy_page_from_iter_iovec(page, offset, bytes, i);
>  }
> -EXPORT_SYMBOL(copy_page_from_iter);
>  
>  static size_t pipe_zero(size_t bytes, struct iov_iter *i)
>  {
> @@ -975,7 +973,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
>  	return bytes;
>  }
>  
> -size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
> +static size_t xxx_zero(size_t bytes, struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return pipe_zero(bytes, i);
> @@ -987,9 +985,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
>  
>  	return bytes;
>  }
> -EXPORT_SYMBOL(iov_iter_zero);
>  
> -size_t iov_iter_copy_from_user_atomic(struct page *page,
> +static size_t xxx_copy_from_user_atomic(struct page *page,
>  		struct iov_iter *i, unsigned long offset, size_t bytes)
>  {
>  	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
> @@ -1011,7 +1008,6 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
>  	kunmap_atomic(kaddr);
>  	return bytes;
>  }
> -EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
>  
>  static inline void pipe_truncate(struct iov_iter *i)
>  {
> @@ -1067,7 +1063,7 @@ static void pipe_advance(struct iov_iter *i, size_t size)
>  	pipe_truncate(i);
>  }
>  
> -void iov_iter_advance(struct iov_iter *i, size_t size)
> +static void xxx_advance(struct iov_iter *i, size_t size)
>  {
>  	if (unlikely(iov_iter_is_pipe(i))) {
>  		pipe_advance(i, size);
> @@ -1079,9 +1075,8 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
>  	}
>  	iterate_and_advance(i, size, v, 0, 0, 0)
>  }
> -EXPORT_SYMBOL(iov_iter_advance);
>  
> -void iov_iter_revert(struct iov_iter *i, size_t unroll)
> +static void xxx_revert(struct iov_iter *i, size_t unroll)
>  {
>  	if (!unroll)
>  		return;
> @@ -1147,12 +1142,11 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
>  		}
>  	}
>  }
> -EXPORT_SYMBOL(iov_iter_revert);
>  
>  /*
>   * Return the count of just the current iov_iter segment.
>   */
> -size_t iov_iter_single_seg_count(const struct iov_iter *i)
> +static size_t xxx_single_seg_count(const struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return i->count;	// it is a silly place, anyway
> @@ -1165,14 +1159,14 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
>  	else
>  		return min(i->count, i->iov->iov_len - i->iov_offset);
>  }
> -EXPORT_SYMBOL(iov_iter_single_seg_count);
>  
>  void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
> -			const struct kvec *kvec, unsigned long nr_segs,
> -			size_t count)
> +		   const struct kvec *kvec, unsigned long nr_segs,
> +		   size_t count)
>  {
>  	WARN_ON(direction & ~(READ | WRITE));
> -	i->type = ITER_KVEC | (direction & (READ | WRITE));
> +	i->ops = &kvec_iter_ops;
> +	i->flags = direction & (READ | WRITE);
>  	i->kvec = kvec;
>  	i->nr_segs = nr_segs;
>  	i->iov_offset = 0;
> @@ -1185,7 +1179,8 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
>  			size_t count)
>  {
>  	WARN_ON(direction & ~(READ | WRITE));
> -	i->type = ITER_BVEC | (direction & (READ | WRITE));
> +	i->ops = &bvec_iter_ops;
> +	i->flags = direction & (READ | WRITE);
>  	i->bvec = bvec;
>  	i->nr_segs = nr_segs;
>  	i->iov_offset = 0;
> @@ -1199,7 +1194,8 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
>  {
>  	BUG_ON(direction != READ);
>  	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
> -	i->type = ITER_PIPE | READ;
> +	i->ops = &pipe_iter_ops;
> +	i->flags = READ;
>  	i->pipe = pipe;
>  	i->head = pipe->head;
>  	i->iov_offset = 0;
> @@ -1220,13 +1216,14 @@ EXPORT_SYMBOL(iov_iter_pipe);
>  void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
>  {
>  	BUG_ON(direction != READ);
> -	i->type = ITER_DISCARD | READ;
> +	i->ops = &discard_iter_ops;
> +	i->flags = READ;
>  	i->count = count;
>  	i->iov_offset = 0;
>  }
>  EXPORT_SYMBOL(iov_iter_discard);
>  
> -unsigned long iov_iter_alignment(const struct iov_iter *i)
> +static unsigned long xxx_alignment(const struct iov_iter *i)
>  {
>  	unsigned long res = 0;
>  	size_t size = i->count;
> @@ -1245,9 +1242,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
>  	)
>  	return res;
>  }
> -EXPORT_SYMBOL(iov_iter_alignment);
>  
> -unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
> +static unsigned long xxx_gap_alignment(const struct iov_iter *i)
>  {
>  	unsigned long res = 0;
>  	size_t size = i->count;
> @@ -1267,7 +1263,6 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  		);
>  	return res;
>  }
> -EXPORT_SYMBOL(iov_iter_gap_alignment);
>  
>  static inline ssize_t __pipe_get_pages(struct iov_iter *i,
>  				size_t maxsize,
> @@ -1313,7 +1308,7 @@ static ssize_t pipe_get_pages(struct iov_iter *i,
>  	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
>  }
>  
> -ssize_t iov_iter_get_pages(struct iov_iter *i,
> +static ssize_t xxx_get_pages(struct iov_iter *i,
>  		   struct page **pages, size_t maxsize, unsigned maxpages,
>  		   size_t *start)
>  {
> @@ -1352,7 +1347,6 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
>  	)
>  	return 0;
>  }
> -EXPORT_SYMBOL(iov_iter_get_pages);
>  
>  static struct page **get_pages_array(size_t n)
>  {
> @@ -1392,7 +1386,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
>  	return n;
>  }
>  
> -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
> +static ssize_t xxx_get_pages_alloc(struct iov_iter *i,
>  		   struct page ***pages, size_t maxsize,
>  		   size_t *start)
>  {
> @@ -1439,9 +1433,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
>  	)
>  	return 0;
>  }
> -EXPORT_SYMBOL(iov_iter_get_pages_alloc);
>  
> -size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
> +static size_t xxx_csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
>  			       struct iov_iter *i)
>  {
>  	char *to = addr;
> @@ -1478,9 +1471,8 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
>  	*csum = sum;
>  	return bytes;
>  }
> -EXPORT_SYMBOL(csum_and_copy_from_iter);
>  
> -bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
> +static bool xxx_csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
>  			       struct iov_iter *i)
>  {
>  	char *to = addr;
> @@ -1520,9 +1512,8 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
>  	iov_iter_advance(i, bytes);
>  	return true;
>  }
> -EXPORT_SYMBOL(csum_and_copy_from_iter_full);
>  
> -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
> +static size_t xxx_csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
>  			     struct iov_iter *i)
>  {
>  	const char *from = addr;
> @@ -1564,7 +1555,6 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
>  	*csum = sum;
>  	return bytes;
>  }
> -EXPORT_SYMBOL(csum_and_copy_to_iter);
>  
>  size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
>  		struct iov_iter *i)
> @@ -1585,7 +1575,7 @@ size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
>  }
>  EXPORT_SYMBOL(hash_and_copy_to_iter);
>  
> -int iov_iter_npages(const struct iov_iter *i, int maxpages)
> +static int xxx_npages(const struct iov_iter *i, int maxpages)
>  {
>  	size_t size = i->count;
>  	int npages = 0;
> @@ -1628,9 +1618,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
>  	)
>  	return npages;
>  }
> -EXPORT_SYMBOL(iov_iter_npages);
>  
> -const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
> +static const void *xxx_dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
>  {
>  	*new = *old;
>  	if (unlikely(iov_iter_is_pipe(new))) {
> @@ -1649,7 +1638,6 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
>  				   new->nr_segs * sizeof(struct iovec),
>  				   flags);
>  }
> -EXPORT_SYMBOL(dup_iter);
>  
>  static int copy_compat_iovec_from_user(struct iovec *iov,
>  		const struct iovec __user *uvec, unsigned long nr_segs)
> @@ -1826,7 +1814,7 @@ int import_single_range(int rw, void __user *buf, size_t len,
>  }
>  EXPORT_SYMBOL(import_single_range);
>  
> -int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
> +static int xxx_for_each_range(struct iov_iter *i, size_t bytes,
>  			    int (*f)(struct kvec *vec, void *context),
>  			    void *context)
>  {
> @@ -1846,4 +1834,173 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
>  	)
>  	return err;
>  }
> -EXPORT_SYMBOL(iov_iter_for_each_range);
> +
> +static const struct iov_iter_ops iovec_iter_ops = {
> +	.type				= ITER_IOVEC,
> +	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
> +	.advance			= xxx_advance,
> +	.revert				= xxx_revert,
> +	.fault_in_readable		= xxx_fault_in_readable,
> +	.single_seg_count		= xxx_single_seg_count,
> +	.copy_page_to_iter		= xxx_copy_page_to_iter,
> +	.copy_page_from_iter		= xxx_copy_page_from_iter,
> +	.copy_to_iter			= xxx_copy_to_iter,
> +	.copy_from_iter			= xxx_copy_from_iter,
> +	.copy_from_iter_full		= xxx_copy_from_iter_full,
> +	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
> +	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
> +#endif
> +	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
> +	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
> +	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
> +
> +	.zero				= xxx_zero,
> +	.alignment			= xxx_alignment,
> +	.gap_alignment			= xxx_gap_alignment,
> +	.get_pages			= xxx_get_pages,
> +	.get_pages_alloc		= xxx_get_pages_alloc,
> +	.npages				= xxx_npages,
> +	.dup_iter			= xxx_dup_iter,
> +	.for_each_range			= xxx_for_each_range,
> +};
> +
> +static const struct iov_iter_ops kvec_iter_ops = {
> +	.type				= ITER_KVEC,
> +	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
> +	.advance			= xxx_advance,
> +	.revert				= xxx_revert,
> +	.fault_in_readable		= xxx_fault_in_readable,
> +	.single_seg_count		= xxx_single_seg_count,
> +	.copy_page_to_iter		= xxx_copy_page_to_iter,
> +	.copy_page_from_iter		= xxx_copy_page_from_iter,
> +	.copy_to_iter			= xxx_copy_to_iter,
> +	.copy_from_iter			= xxx_copy_from_iter,
> +	.copy_from_iter_full		= xxx_copy_from_iter_full,
> +	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
> +	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
> +#endif
> +	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
> +	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
> +	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
> +
> +	.zero				= xxx_zero,
> +	.alignment			= xxx_alignment,
> +	.gap_alignment			= xxx_gap_alignment,
> +	.get_pages			= xxx_get_pages,
> +	.get_pages_alloc		= xxx_get_pages_alloc,
> +	.npages				= xxx_npages,
> +	.dup_iter			= xxx_dup_iter,
> +	.for_each_range			= xxx_for_each_range,
> +};
> +
> +static const struct iov_iter_ops bvec_iter_ops = {
> +	.type				= ITER_BVEC,
> +	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
> +	.advance			= xxx_advance,
> +	.revert				= xxx_revert,
> +	.fault_in_readable		= xxx_fault_in_readable,
> +	.single_seg_count		= xxx_single_seg_count,
> +	.copy_page_to_iter		= xxx_copy_page_to_iter,
> +	.copy_page_from_iter		= xxx_copy_page_from_iter,
> +	.copy_to_iter			= xxx_copy_to_iter,
> +	.copy_from_iter			= xxx_copy_from_iter,
> +	.copy_from_iter_full		= xxx_copy_from_iter_full,
> +	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
> +	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
> +#endif
> +	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
> +	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
> +	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
> +
> +	.zero				= xxx_zero,
> +	.alignment			= xxx_alignment,
> +	.gap_alignment			= xxx_gap_alignment,
> +	.get_pages			= xxx_get_pages,
> +	.get_pages_alloc		= xxx_get_pages_alloc,
> +	.npages				= xxx_npages,
> +	.dup_iter			= xxx_dup_iter,
> +	.for_each_range			= xxx_for_each_range,
> +};
> +
> +static const struct iov_iter_ops pipe_iter_ops = {
> +	.type				= ITER_PIPE,
> +	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
> +	.advance			= xxx_advance,
> +	.revert				= xxx_revert,
> +	.fault_in_readable		= xxx_fault_in_readable,
> +	.single_seg_count		= xxx_single_seg_count,
> +	.copy_page_to_iter		= xxx_copy_page_to_iter,
> +	.copy_page_from_iter		= xxx_copy_page_from_iter,
> +	.copy_to_iter			= xxx_copy_to_iter,
> +	.copy_from_iter			= xxx_copy_from_iter,
> +	.copy_from_iter_full		= xxx_copy_from_iter_full,
> +	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
> +	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
> +#endif
> +	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
> +	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
> +	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
> +
> +	.zero				= xxx_zero,
> +	.alignment			= xxx_alignment,
> +	.gap_alignment			= xxx_gap_alignment,
> +	.get_pages			= xxx_get_pages,
> +	.get_pages_alloc		= xxx_get_pages_alloc,
> +	.npages				= xxx_npages,
> +	.dup_iter			= xxx_dup_iter,
> +	.for_each_range			= xxx_for_each_range,
> +};
> +
> +static const struct iov_iter_ops discard_iter_ops = {
> +	.type				= ITER_DISCARD,
> +	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
> +	.advance			= xxx_advance,
> +	.revert				= xxx_revert,
> +	.fault_in_readable		= xxx_fault_in_readable,
> +	.single_seg_count		= xxx_single_seg_count,
> +	.copy_page_to_iter		= xxx_copy_page_to_iter,
> +	.copy_page_from_iter		= xxx_copy_page_from_iter,
> +	.copy_to_iter			= xxx_copy_to_iter,
> +	.copy_from_iter			= xxx_copy_from_iter,
> +	.copy_from_iter_full		= xxx_copy_from_iter_full,
> +	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
> +	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
> +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
> +	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
> +#endif
> +#ifdef CONFIG_ARCH_HAS_COPY_MC
> +	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
> +#endif
> +	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
> +	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
> +	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
> +
> +	.zero				= xxx_zero,
> +	.alignment			= xxx_alignment,
> +	.gap_alignment			= xxx_gap_alignment,
> +	.get_pages			= xxx_get_pages,
> +	.get_pages_alloc		= xxx_get_pages_alloc,
> +	.npages				= xxx_npages,
> +	.dup_iter			= xxx_dup_iter,
> +	.for_each_range			= xxx_for_each_range,
> +};
> 
>
Linus Torvalds Nov. 21, 2020, 6:21 p.m. UTC | #2
On Sat, Nov 21, 2020 at 6:13 AM David Howells <dhowells@redhat.com> wrote:
>
> Switch to using a table of operations.  In a future patch the individual
> methods will be split up by type.  For the moment, however, the ops tables
> just jump directly to the old functions - which are now static.  Inline
> wrappers are provided to jump through the hooks.

So I think conceptually this is the right thing to do, but I have a
couple of worries:

 - do we really need all those different versions? I'm thinking
"iter_full" versions in particular. They I think the iter_full version
could just be wrappers that call the regular iter thing and verify the
end result is full (and revert if not). No?

 - I don't like the xxx_iter_op naming - even as a temporary thing.

   Please don't use "xxx" as a placeholder. It's not a great grep
pattern, it's not really descriptive, and we've literally had issues
with things being marked as spam when you use that. So it's about the
worst pattern to use.

   Use "anycase" - or something like that - which is descriptive and
greps much better (ie not a single hit for that pattern in the kernel
either before or after).

 - I worry a bit about the indirect call overhead and spectre v2.

   So yeah, it would be good to have benchmarks to make sure this
doesn't regress for some simple case.

Other than those things, my initial reaction is "this does seem cleaner".

Al?

              Linus
David Howells Nov. 22, 2020, 1:33 p.m. UTC | #3
Linus Torvalds <torvalds@linux-foundation.org> wrote:

>  - I worry a bit about the indirect call overhead and spectre v2.

I don't know enough about how spectre v2 works to say if this would be a
problem for the ops-table approach, but wouldn't it also affect the chain of
conditional branches that we currently use, since it's branch-prediction
based?

David
David Laight Nov. 22, 2020, 1:58 p.m. UTC | #4
From: David Howells
> Sent: 22 November 2020 13:33
> 
> Linus Torvalds <torvalds@linux-foundation.org> wrote:
> 
> >  - I worry a bit about the indirect call overhead and spectre v2.
> 
> I don't know enough about how spectre v2 works to say if this would be a
> problem for the ops-table approach, but wouldn't it also affect the chain of
> conditional branches that we currently use, since it's branch-prediction
> based?

The advantage of the 'chain of branches' is that it can be converted
into a 'tree of branches' because the values are all separate bits.

So as well as putting the (expected) common one first; you can do:
	if (likely((a & (A | B))) {
		if (a & A) {
			code for A;
		} else {
			code for B;
	} else ...
So get better control over the branch sequence.
(Hopefully the compiler doesn't change the logic.
I want a dumb compiler that (mostly) compiles what I write!)

Part of the difficulty is deciding the common case.
There'll always be a benchmark that exercises an uncommon case.

Adding an indirect call does let you do things like adding
ITER_IOVER_SINGLE and ITER_KVEC_SINGLE that are used in the
common case of a single buffer fragment.
That might be a measurable gain.

It is also possible to optimise the common case to a direct
call (or even inline code) and use an indirect call for
everything else.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Linus Torvalds Nov. 22, 2020, 7:22 p.m. UTC | #5
On Sun, Nov 22, 2020 at 5:33 AM David Howells <dhowells@redhat.com> wrote:
>
> I don't know enough about how spectre v2 works to say if this would be a
> problem for the ops-table approach, but wouldn't it also affect the chain of
> conditional branches that we currently use, since it's branch-prediction
> based?

No, regular conditional branches aren't a problem. Yes, they may
mispredict, but outside of a few very rare cases that we handle
specially, that's not an issue.

Why? Because they always mispredict to one or the other side, so the
code flow may be mis-predicted, but it is fairly controlled.

In contrast, an indirect jump can mispredict the target, and branch
_anywhere_, and the attack vectors can poison the BTB (branch target
buffer), so our mitigation for that is that every single indirect
branch isn't predicted at all (using "retpoline").

So a conditional branch takes zero cycles when predicted (and most
will predict quite well). And as David Laight pointed out a compiler
can also turn a series of conditional branches into a tree, means that
N conditional branches basically only needs log2(N) conditionals
executed.

In contrast, with retpoline in place, an indirect branch will
basically always take something like 25-30 cycles, because it always
mispredicts.

End result:

 - well-predicted conditional branches are basically free (apart from
code layout issues)

 - even with average prediction, a series of conditional branches has
to be fairly long for it to be worse than an indirect branch

 - only completely unpredictable conditional branches end up basically
losing, and even then you probably need more than one. And while
completely unpredictable conditional branches do exist, they are
pretty rare.

The other side of the coin, of course, is

 - often this is not measurable anyway.

 - code cleanliness is important

 - not everything needs retpolines and the expensive indirect branches.

So this is not in any way "indirect branches are bad". It's more of a
"indirect branches really aren't necessarily better than a couple of
conditionals, and _may_ be much worse".

For example, look at this gcc bugzilla:

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952

which basically is about the compiler generating a jump table (is a
single indirect branch) vs a series of conditional branches. With
retpoline, the cross-over point is basically when you need to have
over 10 conditional branches - and because of the log2(N) behavior,
that's around a thousand cases!

(But this depends hugely on microarchitectural details).

             Linus
David Laight Nov. 22, 2020, 10:34 p.m. UTC | #6
From: Linus Torvalds
> Sent: 22 November 2020 19:22
> Subject: Re: [PATCH 01/29] iov_iter: Switch to using a table of operations
> 
> On Sun, Nov 22, 2020 at 5:33 AM David Howells <dhowells@redhat.com> wrote:
> >
> > I don't know enough about how spectre v2 works to say if this would be a
> > problem for the ops-table approach, but wouldn't it also affect the chain of
> > conditional branches that we currently use, since it's branch-prediction
> > based?
> 
> No, regular conditional branches aren't a problem. Yes, they may
> mispredict, but outside of a few very rare cases that we handle
> specially, that's not an issue.
> 
> Why? Because they always mispredict to one or the other side, so the
> code flow may be mis-predicted, but it is fairly controlled.
> 
> In contrast, an indirect jump can mispredict the target, and branch
> _anywhere_, and the attack vectors can poison the BTB (branch target
> buffer), so our mitigation for that is that every single indirect
> branch isn't predicted at all (using "retpoline").
> 
> So a conditional branch takes zero cycles when predicted (and most
> will predict quite well). And as David Laight pointed out a compiler
> can also turn a series of conditional branches into a tree, means that
> N conditional branches basically only needs log2(N) conditionals
> executed.

The compiler can convert a switch statement into a branch tree.
But I don't think it can convert the 'if chain' in the current code
to one.

There is also the problem that some x86 cpu can't predict branches
if too many happen in the same cache line (or similar).

> In contrast, with retpoline in place, an indirect branch will
> basically always take something like 25-30 cycles, because it always
> mispredicts.

I also wonder if a retpoline also trashes the return stack optimisation.
(If that is ever really a significant gain for real functions.)
 
...
> So this is not in any way "indirect branches are bad". It's more of a
> "indirect branches really aren't necessarily better than a couple of
> conditionals, and _may_ be much worse".

Even without retpolines, the jump table is likely to a data-cache
miss (and maybe a TLB miss) unless you are running hot-cache.
That is probably an extra cache miss on top of the I-cache ones.
Even worse if you end up with the jump table near the code
since the data cache line and TLB might never be shared.

So a very short switch statement is likely to be better as
conditional jumps anyway.

> For example, look at this gcc bugzilla:
> 
>     https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952
> 
> which basically is about the compiler generating a jump table (is a
> single indirect branch) vs a series of conditional branches. With
> retpoline, the cross-over point is basically when you need to have
> over 10 conditional branches - and because of the log2(N) behavior,
> that's around a thousand cases!

That was a hot-cache test.
Cold-cache is likely to favour the retpoline a little sooner.
(And the retpoline (probbaly) won't be (much) worse than the
mid-predicted indirect jump.

I do wonder how much of the kernel actually runs hot-cache?
Except for parts that explicitly run things in bursts.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
David Laight Nov. 22, 2020, 10:46 p.m. UTC | #7
From: David Howells
> Sent: 21 November 2020 14:14
> 
> Switch to using a table of operations.  In a future patch the individual
> methods will be split up by type.  For the moment, however, the ops tables
> just jump directly to the old functions - which are now static.  Inline
> wrappers are provided to jump through the hooks.

I was wondering if you could use a bit of 'cpp magic'
so the to call sites would be:
	ITER_CALL(iter, action)(arg_list);

which might expand to:
	iter->action(arg_list);
in the function-table case.
But could also be an if-chain:
	if (iter->type & foo)
		foo_action(args);
	else ...
with foo_action() being inlined.

If there is enough symmetry it might make the code easier to read.
Although I'm not sure what happens to 'iterate_all_kinds'.
OTOH that is already unreadable.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Christoph Hellwig Nov. 23, 2020, 8:05 a.m. UTC | #8
On Sat, Nov 21, 2020 at 02:13:30PM +0000, David Howells wrote:
> Switch to using a table of operations.  In a future patch the individual
> methods will be split up by type.  For the moment, however, the ops tables
> just jump directly to the old functions - which are now static.  Inline
> wrappers are provided to jump through the hooks.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>

Please run performance tests.  I think the indirect calls could totally
wreck things like high performance direct I/O, especially using io_uring
on x86.
David Howells Nov. 23, 2020, 10:31 a.m. UTC | #9
Christoph Hellwig <hch@infradead.org> wrote:

> Please run performance tests.  I think the indirect calls could totally
> wreck things like high performance direct I/O, especially using io_uring
> on x86.

Here's an initial test using fio and null_blk.  I left null_blk in its default
configuration and used the following command line:

fio --ioengine=libaio --direct=1 --gtod_reduce=1 --name=readtest --filename=/dev/nullb0 --bs=4k --iodepth=128 --time_based --runtime=120 --readwrite=randread --iodepth_low=96 --iodepth_batch=16 --numjobs=4

I borrowed some of the parameters from an email I found online, so I'm not
sure if they're that useful.

I tried three different sets of patches: none, just the first (which adds the
jump table without getting rid of the conditional branches), and all of them.

I'm not sure which stats are of particular interest here, so I took the two
summary stats from the output of fio and also added together the "issued rwts:
total=a,b,c,d" from each test thread (only the first of which is non-zero).

The CPU is an Intel(R) Core(TM) i3-4170 CPU @ 3.70GHz, so 4 single-thread
cores, and 16G of RAM.  No virtualisation is involved.

Unpatched:

   READ: bw=4109MiB/s (4308MB/s), 1025MiB/s-1029MiB/s (1074MB/s-1079MB/s), io=482GiB (517GB), run=120001-120001msec
   READ: bw=4097MiB/s (4296MB/s), 1020MiB/s-1029MiB/s (1070MB/s-1079MB/s), io=480GiB (516GB), run=120001-120001msec
   READ: bw=4113MiB/s (4312MB/s), 1025MiB/s-1031MiB/s (1075MB/s-1082MB/s), io=482GiB (517GB), run=120001-120001msec
   READ: bw=4125MiB/s (4325MB/s), 1028MiB/s-1033MiB/s (1078MB/s-1084MB/s), io=483GiB (519GB), run=120001-120001msec

  nullb0: ios=126017326/0, merge=53/0, ticks=3538817/0, in_queue=3538817, util=100.00%
  nullb0: ios=125655193/0, merge=55/0, ticks=3548157/0, in_queue=3548157, util=100.00%
  nullb0: ios=126133014/0, merge=58/0, ticks=3545621/0, in_queue=3545621, util=100.00%
  nullb0: ios=126512562/0, merge=57/0, ticks=3531600/0, in_queue=3531600, util=100.00%

  sum issued rwts = 126224632
  sum issued rwts = 125861368
  sum issued rwts = 126340344
  sum issued rwts = 126718648

Just first patch:

   READ: bw=4106MiB/s (4306MB/s), 1023MiB/s-1030MiB/s (1073MB/s-1080MB/s), io=481GiB (517GB), run=120001-120001msec
   READ: bw=4126MiB/s (4327MB/s), 1029MiB/s-1034MiB/s (1079MB/s-1084MB/s), io=484GiB (519GB), run=120001-120001msec
   READ: bw=4109MiB/s (4308MB/s), 1025MiB/s-1029MiB/s (1075MB/s-1079MB/s), io=481GiB (517GB), run=120001-120001msec
   READ: bw=4097MiB/s (4296MB/s), 1023MiB/s-1025MiB/s (1073MB/s-1074MB/s), io=480GiB (516GB), run=120001-120001msec

  nullb0: ios=125939152/0, merge=62/0, ticks=3534917/0, in_queue=3534917, util=100.00%
  nullb0: ios=126554181/0, merge=61/0, ticks=3532067/0, in_queue=3532067, util=100.00%
  nullb0: ios=126012346/0, merge=54/0, ticks=3530504/0, in_queue=3530504, util=100.00%
  nullb0: ios=125653775/0, merge=54/0, ticks=3537438/0, in_queue=3537438, util=100.00%

  sum issued rwts = 126144952
  sum issued rwts = 126765368
  sum issued rwts = 126215928
  sum issued rwts = 125864120

All patches:
  nullb0: ios=10477062/0, merge=2/0, ticks=284992/0, in_queue=284992, util=95.87%
  nullb0: ios=10405246/0, merge=2/0, ticks=291886/0, in_queue=291886, util=99.82%
  nullb0: ios=10425583/0, merge=1/0, ticks=291699/0, in_queue=291699, util=99.22%
  nullb0: ios=10438845/0, merge=3/0, ticks=292445/0, in_queue=292445, util=99.31%

   READ: bw=4118MiB/s (4318MB/s), 1028MiB/s-1032MiB/s (1078MB/s-1082MB/s), io=483GiB (518GB), run=120001-120001msec
   READ: bw=4109MiB/s (4308MB/s), 1024MiB/s-1030MiB/s (1073MB/s-1080MB/s), io=481GiB (517GB), run=120001-120001msec
   READ: bw=4108MiB/s (4308MB/s), 1026MiB/s-1029MiB/s (1076MB/s-1079MB/s), io=481GiB (517GB), run=120001-120001msec
   READ: bw=4112MiB/s (4312MB/s), 1025MiB/s-1031MiB/s (1075MB/s-1081MB/s), io=482GiB (517GB), run=120001-120001msec

  nullb0: ios=126282410/0, merge=58/0, ticks=3557384/0, in_queue=3557384, util=100.00%
  nullb0: ios=126004837/0, merge=67/0, ticks=3565235/0, in_queue=3565235, util=100.00%
  nullb0: ios=125988876/0, merge=59/0, ticks=3563026/0, in_queue=3563026, util=100.00%
  nullb0: ios=126118279/0, merge=57/0, ticks=3566122/0, in_queue=3566122, util=100.00%

  sum issued rwts = 126494904
  sum issued rwts = 126214200
  sum issued rwts = 126198200
  sum issued rwts = 126328312


David
David Howells Nov. 23, 2020, 11:14 a.m. UTC | #10
David Howells <dhowells@redhat.com> wrote:

> I tried three different sets of patches: none, just the first (which adds the
> jump table without getting rid of the conditional branches), and all of them.

And, I forgot to mention, I ran each test four times and then interleaved the
result lines for that set.

David
Pavel Begunkov Nov. 23, 2020, 11:21 p.m. UTC | #11
On 21/11/2020 14:31, Pavel Begunkov wrote:
> On 21/11/2020 14:13, David Howells wrote:
>> Switch to using a table of operations.  In a future patch the individual
>> methods will be split up by type.  For the moment, however, the ops tables
>> just jump directly to the old functions - which are now static.  Inline
>> wrappers are provided to jump through the hooks.
>>
>> Signed-off-by: David Howells <dhowells@redhat.com>
>> ---
>>
>>  fs/io_uring.c       |    2 
>>  include/linux/uio.h |  241 ++++++++++++++++++++++++++++++++++--------
>>  lib/iov_iter.c      |  293 +++++++++++++++++++++++++++++++++++++++------------
>>  3 files changed, 422 insertions(+), 114 deletions(-)
>>
>> diff --git a/fs/io_uring.c b/fs/io_uring.c
>> index 4ead291b2976..baa78f58ae5c 100644
>> --- a/fs/io_uring.c
>> +++ b/fs/io_uring.c
>> @@ -3192,7 +3192,7 @@ static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
>>  	rw->free_iovec = iovec;
>>  	rw->bytes_done = 0;
>>  	/* can only be fixed buffers, no need to do anything */
>> -	if (iter->type == ITER_BVEC)
>> +	if (iov_iter_is_bvec(iter))
> 
> Could you split this io_uring change and send for 5.10?
> Or I can do it for you if you wish.

FYI, I stole this chunk with right attributes. It should go through
io_uring 5.10, so shouldn't be a problem if you just drop it.
Pavel Begunkov Nov. 23, 2020, 11:42 p.m. UTC | #12
On 23/11/2020 10:31, David Howells wrote:
> Christoph Hellwig <hch@infradead.org> wrote:
> 
>> Please run performance tests.  I think the indirect calls could totally
>> wreck things like high performance direct I/O, especially using io_uring
>> on x86.
> 
> Here's an initial test using fio and null_blk.  I left null_blk in its default
> configuration and used the following command line:

I'd prefer something along no_sched=1 submit_queues=$(nproc) to reduce overhead.

> 
> fio --ioengine=libaio --direct=1 --gtod_reduce=1 --name=readtest --filename=/dev/nullb0 --bs=4k --iodepth=128 --time_based --runtime=120 --readwrite=randread --iodepth_low=96 --iodepth_batch=16 --numjobs=4

fio is relatively heavy, I'd suggest to try fio/t/io_uring with nullblk

> 
> I borrowed some of the parameters from an email I found online, so I'm not
> sure if they're that useful.
> 
> I tried three different sets of patches: none, just the first (which adds the
> jump table without getting rid of the conditional branches), and all of them.
> 
> I'm not sure which stats are of particular interest here, so I took the two
> summary stats from the output of fio and also added together the "issued rwts:
> total=a,b,c,d" from each test thread (only the first of which is non-zero).
> 
> The CPU is an Intel(R) Core(TM) i3-4170 CPU @ 3.70GHz, so 4 single-thread
> cores, and 16G of RAM.  No virtualisation is involved.
> 
> Unpatched:
> 
>    READ: bw=4109MiB/s (4308MB/s), 1025MiB/s-1029MiB/s (1074MB/s-1079MB/s), io=482GiB (517GB), run=120001-120001msec
>    READ: bw=4097MiB/s (4296MB/s), 1020MiB/s-1029MiB/s (1070MB/s-1079MB/s), io=480GiB (516GB), run=120001-120001msec
>    READ: bw=4113MiB/s (4312MB/s), 1025MiB/s-1031MiB/s (1075MB/s-1082MB/s), io=482GiB (517GB), run=120001-120001msec
>    READ: bw=4125MiB/s (4325MB/s), 1028MiB/s-1033MiB/s (1078MB/s-1084MB/s), io=483GiB (519GB), run=120001-120001msec
> 
>   nullb0: ios=126017326/0, merge=53/0, ticks=3538817/0, in_queue=3538817, util=100.00%
>   nullb0: ios=125655193/0, merge=55/0, ticks=3548157/0, in_queue=3548157, util=100.00%
>   nullb0: ios=126133014/0, merge=58/0, ticks=3545621/0, in_queue=3545621, util=100.00%
>   nullb0: ios=126512562/0, merge=57/0, ticks=3531600/0, in_queue=3531600, util=100.00%
> 
>   sum issued rwts = 126224632
>   sum issued rwts = 125861368
>   sum issued rwts = 126340344
>   sum issued rwts = 126718648
> 
> Just first patch:
> 
>    READ: bw=4106MiB/s (4306MB/s), 1023MiB/s-1030MiB/s (1073MB/s-1080MB/s), io=481GiB (517GB), run=120001-120001msec
>    READ: bw=4126MiB/s (4327MB/s), 1029MiB/s-1034MiB/s (1079MB/s-1084MB/s), io=484GiB (519GB), run=120001-120001msec
>    READ: bw=4109MiB/s (4308MB/s), 1025MiB/s-1029MiB/s (1075MB/s-1079MB/s), io=481GiB (517GB), run=120001-120001msec
>    READ: bw=4097MiB/s (4296MB/s), 1023MiB/s-1025MiB/s (1073MB/s-1074MB/s), io=480GiB (516GB), run=120001-120001msec
> 
>   nullb0: ios=125939152/0, merge=62/0, ticks=3534917/0, in_queue=3534917, util=100.00%
>   nullb0: ios=126554181/0, merge=61/0, ticks=3532067/0, in_queue=3532067, util=100.00%
>   nullb0: ios=126012346/0, merge=54/0, ticks=3530504/0, in_queue=3530504, util=100.00%
>   nullb0: ios=125653775/0, merge=54/0, ticks=3537438/0, in_queue=3537438, util=100.00%
> 
>   sum issued rwts = 126144952
>   sum issued rwts = 126765368
>   sum issued rwts = 126215928
>   sum issued rwts = 125864120
> 
> All patches:
>   nullb0: ios=10477062/0, merge=2/0, ticks=284992/0, in_queue=284992, util=95.87%
>   nullb0: ios=10405246/0, merge=2/0, ticks=291886/0, in_queue=291886, util=99.82%
>   nullb0: ios=10425583/0, merge=1/0, ticks=291699/0, in_queue=291699, util=99.22%
>   nullb0: ios=10438845/0, merge=3/0, ticks=292445/0, in_queue=292445, util=99.31%
> 
>    READ: bw=4118MiB/s (4318MB/s), 1028MiB/s-1032MiB/s (1078MB/s-1082MB/s), io=483GiB (518GB), run=120001-120001msec
>    READ: bw=4109MiB/s (4308MB/s), 1024MiB/s-1030MiB/s (1073MB/s-1080MB/s), io=481GiB (517GB), run=120001-120001msec
>    READ: bw=4108MiB/s (4308MB/s), 1026MiB/s-1029MiB/s (1076MB/s-1079MB/s), io=481GiB (517GB), run=120001-120001msec
>    READ: bw=4112MiB/s (4312MB/s), 1025MiB/s-1031MiB/s (1075MB/s-1081MB/s), io=482GiB (517GB), run=120001-120001msec
> 
>   nullb0: ios=126282410/0, merge=58/0, ticks=3557384/0, in_queue=3557384, util=100.00%
>   nullb0: ios=126004837/0, merge=67/0, ticks=3565235/0, in_queue=3565235, util=100.00%
>   nullb0: ios=125988876/0, merge=59/0, ticks=3563026/0, in_queue=3563026, util=100.00%
>   nullb0: ios=126118279/0, merge=57/0, ticks=3566122/0, in_queue=3566122, util=100.00%
> 
>   sum issued rwts = 126494904
>   sum issued rwts = 126214200
>   sum issued rwts = 126198200
>   sum issued rwts = 126328312
> 
> 
> David
>
David Howells Nov. 24, 2020, 12:50 p.m. UTC | #13
Pavel Begunkov <asml.silence@gmail.com> wrote:

> fio is relatively heavy, I'd suggest to try fio/t/io_uring with nullblk

no patches:

IOPS=885152, IOS/call=25/25, inflight=64 (64)
IOPS=890400, IOS/call=25/25, inflight=32 (32)
IOPS=890656, IOS/call=25/25, inflight=64 (64)
IOPS=896096, IOS/call=25/25, inflight=96 (96)
IOPS=876256, IOS/call=25/25, inflight=128 (128)
IOPS=905056, IOS/call=25/25, inflight=128 (128)
IOPS=882912, IOS/call=25/25, inflight=96 (96)
IOPS=887392, IOS/call=25/25, inflight=64 (32)
IOPS=897152, IOS/call=25/25, inflight=128 (128)
IOPS=871392, IOS/call=25/25, inflight=32 (32)
IOPS=865088, IOS/call=25/25, inflight=96 (96)
IOPS=880032, IOS/call=25/25, inflight=32 (32)
IOPS=905376, IOS/call=25/25, inflight=96 (96)
IOPS=898016, IOS/call=25/25, inflight=128 (128)
IOPS=885792, IOS/call=25/25, inflight=64 (64)
IOPS=897632, IOS/call=25/25, inflight=96 (96)

first patch only:

IOPS=876640, IOS/call=25/25, inflight=64 (64)
IOPS=878208, IOS/call=25/25, inflight=64 (64)
IOPS=884000, IOS/call=25/25, inflight=64 (64)
IOPS=900864, IOS/call=25/25, inflight=64 (64)
IOPS=878496, IOS/call=25/25, inflight=64 (64)
IOPS=870944, IOS/call=25/25, inflight=32 (32)
IOPS=900672, IOS/call=25/25, inflight=32 (32)
IOPS=882368, IOS/call=25/25, inflight=128 (128)
IOPS=877120, IOS/call=25/25, inflight=128 (128)
IOPS=861856, IOS/call=25/25, inflight=64 (64)
IOPS=892896, IOS/call=25/25, inflight=96 (96)
IOPS=875808, IOS/call=25/25, inflight=128 (128)
IOPS=887808, IOS/call=25/25, inflight=32 (80)
IOPS=889984, IOS/call=25/25, inflight=128 (128)

all patches:

IOPS=872192, IOS/call=25/25, inflight=96 (96)
IOPS=887360, IOS/call=25/25, inflight=32 (32)
IOPS=894432, IOS/call=25/25, inflight=128 (128)
IOPS=884640, IOS/call=25/25, inflight=32 (32)
IOPS=886784, IOS/call=25/25, inflight=32 (32)
IOPS=884160, IOS/call=25/25, inflight=96 (96)
IOPS=886944, IOS/call=25/25, inflight=96 (96)
IOPS=903360, IOS/call=25/25, inflight=128 (128)
IOPS=887744, IOS/call=25/25, inflight=64 (64)
IOPS=891072, IOS/call=25/25, inflight=32 (32)
IOPS=900512, IOS/call=25/25, inflight=128 (128)
IOPS=888544, IOS/call=25/25, inflight=128 (128)
IOPS=877312, IOS/call=25/25, inflight=128 (128)
IOPS=895008, IOS/call=25/25, inflight=128 (128)
IOPS=889376, IOS/call=25/25, inflight=128 (128)

David
Jens Axboe Nov. 24, 2020, 3:30 p.m. UTC | #14
On 11/24/20 5:50 AM, David Howells wrote:
> Pavel Begunkov <asml.silence@gmail.com> wrote:
> 
>> fio is relatively heavy, I'd suggest to try fio/t/io_uring with nullblk
> 
> no patches:

Here's what I get. nullb0 using blk-mq, and submit_queues==NPROC.
iostats and merging disabled, using 8k bs for t/io_uring to ensure we
have > 1 segment. Everything pinned to the same CPU to ensure
reproducibility and stability. Kernel has CONFIG_RETPOLINE enabled.

5.10-rc5:
IOPS=2453184, IOS/call=32/31, inflight=128 (128)
IOPS=2435648, IOS/call=32/32, inflight=64 (64)
IOPS=2448544, IOS/call=32/31, inflight=96 (96)
IOPS=2439584, IOS/call=32/31, inflight=128 (128)
IOPS=2454176, IOS/call=32/32, inflight=32 (32)

5.10-rc5+all patches
IOPS=2304224, IOS/call=32/32, inflight=64 (64)
IOPS=2309216, IOS/call=32/32, inflight=32 (32)
IOPS=2305376, IOS/call=32/31, inflight=128 (128)
IOPS=2300544, IOS/call=32/32, inflight=128 (128)
IOPS=2301728, IOS/call=32/32, inflight=32 (32)

which looks to be around a 6% drop.

Using actual hardware instead of just null_blk:

5.10-rc5:
IOPS=854163, IOS/call=31/31, inflight=101 (101)
IOPS=855495, IOS/call=31/31, inflight=117 (117)
IOPS=856118, IOS/call=31/31, inflight=100 (100)
IOPS=855863, IOS/call=31/31, inflight=113 (113)
IOPS=856282, IOS/call=31/31, inflight=116 (116)

5.10-rc5+all patches
IOPS=833391, IOS/call=31/31, inflight=100 (100)
IOPS=838342, IOS/call=31/31, inflight=100 (100)
IOPS=839921, IOS/call=31/31, inflight=105 (105)
IOPS=841607, IOS/call=31/31, inflight=123 (123)
IOPS=843625, IOS/call=31/31, inflight=107 (107)

which looks to be around 2-3%, but we're also running at a much
slower rate (830K vs ~2.3M).
David Howells Nov. 27, 2020, 5:14 p.m. UTC | #15
Jens Axboe <axboe@kernel.dk> wrote:

> which looks to be around a 6% drop.

That's quite a lot.

> which looks to be around 2-3%, but we're also running at a much
> slower rate (830K vs ~2.3M).

That's still a lot.

Thanks for having a look!

David
Linus Torvalds Dec. 3, 2020, 5:47 p.m. UTC | #16
On Wed, Dec 2, 2020 at 10:31 PM kernel test robot <oliver.sang@intel.com> wrote:
>
> FYI, we noticed a -4.8% regression of will-it-scale.per_process_ops due to commit:

Ok, I guess that's bigger than expected, but the profile data does
show how bad the indirect branches are.

There's both a "direct" cost of them:

>       0.55 ą 14%      +0.3        0.87 ą 15%  perf-profile.children.cycles-pp.__x86_retpoline_rax
>       0.12 ą 14%      +0.1        0.19 ą 14%  perf-profile.self.cycles-pp.__x86_indirect_thunk_rax
>       0.43 ą 14%      +0.3        0.68 ą 15%  perf-profile.self.cycles-pp.__x86_retpoline_rax

The actual retpoline profile costs themselves do not add up to 4%, but
I think that's because the indirect costs are higher, because the
branch mis-predicts will basically make everything run slower for a
while as the OoO engine needs to restart.

So the global cost then shows up in CPU and branch miss stats, where
the IPC goes down (which is the same thing as saying that CPI goes
up):

>  1.741e+08           +42.3%  2.476e+08        perf-stat.i.branch-misses
>       0.74            -3.9%       0.71        perf-stat.overall.ipc
>       1.35            +4.1%       1.41        perf-stat.overall.cpi

which is why it ends up being so costly even if the retpoline overhead
itself is "only" just under 1%.

           Linus
Jens Axboe Dec. 3, 2020, 5:50 p.m. UTC | #17
On 12/3/20 10:47 AM, Linus Torvalds wrote:
> On Wed, Dec 2, 2020 at 10:31 PM kernel test robot <oliver.sang@intel.com> wrote:
>>
>> FYI, we noticed a -4.8% regression of will-it-scale.per_process_ops due to commit:
> 
> Ok, I guess that's bigger than expected, but the profile data does
> show how bad the indirect branches are.

It's also in the same range (3-6%) as the microbenchmarks I ran and posted.
So at least there's correlation there too.
David Howells Dec. 4, 2020, 11:50 a.m. UTC | #18
Linus Torvalds <torvalds@linux-foundation.org> wrote:

> > FYI, we noticed a -4.8% regression of will-it-scale.per_process_ops due to commit:
> 
> Ok, I guess that's bigger than expected, 

Note that it appears to be testing just the first patch and not the whole
series:

| commit: 9bd0e337c633aed3e8ec3c7397b7ae0b8436f163 ("[PATCH 01/29] iov_iter: Switch to using a table of operations")

that just adds an indirection table without taking away any of the conditional
branching.  It seems quite likely, though, that even if you add all the other
patches, you won't get back enough to make it worth it.

David
Al Viro Dec. 11, 2020, 1:30 a.m. UTC | #19
On Sat, Nov 21, 2020 at 10:21:17AM -0800, Linus Torvalds wrote:
> So I think conceptually this is the right thing to do, but I have a
> couple of worries:
> 
>  - do we really need all those different versions? I'm thinking
> "iter_full" versions in particular. They I think the iter_full version
> could just be wrappers that call the regular iter thing and verify the
> end result is full (and revert if not). No?

Umm...  Not sure - iov_iter_revert() is not exactly light.  OTOH, it's
on a slow path...  Other variants:
	* save local copy, run of normal variant on iter, then copy
the saved back on failure
	* make a local copy, run the normal variant in _that_, then
copy it back on success.

Note that the entire thing is 5 words, and we end up reading all of
them anyway, so I wouldn't bet which variant ends up being faster -
that would need testing to compare.

I would certainly like to get rid of the duplication there, especially
if we are going to add copy_to_iter_full() and friends (there are
use cases for those).

>  - I worry a bit about the indirect call overhead and spectre v2.
> 
>    So yeah, it would be good to have benchmarks to make sure this
> doesn't regress for some simple case.
> 
> Other than those things, my initial reaction is "this does seem cleaner".

It does seem cleaner, all right, but that stuff is on fairly hot paths.
And I didn't want to mix the overhead of indirect calls into the picture,
so it turned into cascades of ifs with rather vile macros to keep the
size down.

It looks like the cost of indirects is noticable.  OTOH, there are
other iov_iter patches floating around, hopefully getting better
code generation.  Let's see how much do those give and if they win
considerably more than those several percents, revisit this series.
diff mbox series

Patch

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4ead291b2976..baa78f58ae5c 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3192,7 +3192,7 @@  static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec,
 	rw->free_iovec = iovec;
 	rw->bytes_done = 0;
 	/* can only be fixed buffers, no need to do anything */
-	if (iter->type == ITER_BVEC)
+	if (iov_iter_is_bvec(iter))
 		return;
 	if (!iovec) {
 		unsigned iov_off = 0;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 72d88566694e..45ee087f8c43 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -32,9 +32,10 @@  struct iov_iter {
 	 * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and
 	 * the caller isn't expecting to drop a page reference when done.
 	 */
-	unsigned int type;
+	unsigned int flags;
 	size_t iov_offset;
 	size_t count;
+	const struct iov_iter_ops *ops;
 	union {
 		const struct iovec *iov;
 		const struct kvec *kvec;
@@ -50,9 +51,63 @@  struct iov_iter {
 	};
 };
 
+void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
+			unsigned long nr_segs, size_t count);
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
+			unsigned long nr_segs, size_t count);
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
+			unsigned long nr_segs, size_t count);
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
+			size_t count);
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
+
+struct iov_iter_ops {
+	enum iter_type type;
+	size_t (*copy_from_user_atomic)(struct page *page, struct iov_iter *i,
+					unsigned long offset, size_t bytes);
+	void (*advance)(struct iov_iter *i, size_t bytes);
+	void (*revert)(struct iov_iter *i, size_t bytes);
+	int (*fault_in_readable)(struct iov_iter *i, size_t bytes);
+	size_t (*single_seg_count)(const struct iov_iter *i);
+	size_t (*copy_page_to_iter)(struct page *page, size_t offset, size_t bytes,
+				    struct iov_iter *i);
+	size_t (*copy_page_from_iter)(struct page *page, size_t offset, size_t bytes,
+				      struct iov_iter *i);
+	size_t (*copy_to_iter)(const void *addr, size_t bytes, struct iov_iter *i);
+	size_t (*copy_from_iter)(void *addr, size_t bytes, struct iov_iter *i);
+	bool (*copy_from_iter_full)(void *addr, size_t bytes, struct iov_iter *i);
+	size_t (*copy_from_iter_nocache)(void *addr, size_t bytes, struct iov_iter *i);
+	bool (*copy_from_iter_full_nocache)(void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	size_t (*copy_from_iter_flushcache)(void *addr, size_t bytes, struct iov_iter *i);
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	size_t (*copy_mc_to_iter)(const void *addr, size_t bytes, struct iov_iter *i);
+#endif
+	size_t (*csum_and_copy_to_iter)(const void *addr, size_t bytes, void *csump,
+					struct iov_iter *i);
+	size_t (*csum_and_copy_from_iter)(void *addr, size_t bytes, __wsum *csum,
+					  struct iov_iter *i);
+	bool (*csum_and_copy_from_iter_full)(void *addr, size_t bytes, __wsum *csum,
+					     struct iov_iter *i);
+
+	size_t (*zero)(size_t bytes, struct iov_iter *i);
+	unsigned long (*alignment)(const struct iov_iter *i);
+	unsigned long (*gap_alignment)(const struct iov_iter *i);
+	ssize_t (*get_pages)(struct iov_iter *i, struct page **pages,
+			     size_t maxsize, unsigned maxpages, size_t *start);
+	ssize_t (*get_pages_alloc)(struct iov_iter *i, struct page ***pages,
+				   size_t maxsize, size_t *start);
+	int (*npages)(const struct iov_iter *i, int maxpages);
+	const void *(*dup_iter)(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+	int (*for_each_range)(struct iov_iter *i, size_t bytes,
+			      int (*f)(struct kvec *vec, void *context),
+			      void *context);
+};
+
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 {
-	return i->type & ~(READ | WRITE);
+	return i->ops->type;
 }
 
 static inline bool iter_is_iovec(const struct iov_iter *i)
@@ -82,7 +137,7 @@  static inline bool iov_iter_is_discard(const struct iov_iter *i)
 
 static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 {
-	return i->type & (READ | WRITE);
+	return i->flags & (READ | WRITE);
 }
 
 /*
@@ -111,22 +166,71 @@  static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 	};
 }
 
-size_t iov_iter_copy_from_user_atomic(struct page *page,
-		struct iov_iter *i, unsigned long offset, size_t bytes);
-void iov_iter_advance(struct iov_iter *i, size_t bytes);
-void iov_iter_revert(struct iov_iter *i, size_t bytes);
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes);
-size_t iov_iter_single_seg_count(const struct iov_iter *i);
+static inline
+size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i,
+				      unsigned long offset, size_t bytes)
+{
+	return i->ops->copy_from_user_atomic(page, i, offset, bytes);
+}
+static inline
+void iov_iter_advance(struct iov_iter *i, size_t bytes)
+{
+	return i->ops->advance(i, bytes);
+}
+static inline
+void iov_iter_revert(struct iov_iter *i, size_t bytes)
+{
+	return i->ops->revert(i, bytes);
+}
+static inline
+int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+{
+	return i->ops->fault_in_readable(i, bytes);
+}
+static inline
+size_t iov_iter_single_seg_count(const struct iov_iter *i)
+{
+	return i->ops->single_seg_count(i);
+}
+
+static inline
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i);
+				       struct iov_iter *i)
+{
+	return i->ops->copy_page_to_iter(page, offset, bytes, i);
+}
+static inline
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i);
+					 struct iov_iter *i)
+{
+	return i->ops->copy_page_from_iter(page, offset, bytes, i);
+}
 
-size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
-size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
-bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i);
-size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
-bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i);
+static __always_inline __must_check
+size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+{
+	return i->ops->copy_to_iter(addr, bytes, i);
+}
+static __always_inline __must_check
+size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	return i->ops->copy_from_iter(addr, bytes, i);
+}
+static __always_inline __must_check
+bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
+{
+	return i->ops->copy_from_iter_full(addr, bytes, i);
+}
+static __always_inline __must_check
+size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	return i->ops->copy_from_iter_nocache(addr, bytes, i);
+}
+static __always_inline __must_check
+bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	return i->ops->copy_from_iter_full_nocache(addr, bytes, i);
+}
 
 static __always_inline __must_check
 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
@@ -173,23 +277,21 @@  bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 		return _copy_from_iter_full_nocache(addr, bytes, i);
 }
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /*
  * Note, users like pmem that depend on the stricter semantics of
  * copy_from_iter_flushcache() than copy_from_iter_nocache() must check for
  * IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) before assuming that the
  * destination is flushed from the cache on return.
  */
-size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
-#else
-#define _copy_from_iter_flushcache _copy_from_iter_nocache
-#endif
-
-#ifdef CONFIG_ARCH_HAS_COPY_MC
-size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
+static __always_inline __must_check
+size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
+{
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	return i->ops->copy_from_iter_flushcache(addr, bytes, i);
 #else
-#define _copy_mc_to_iter _copy_to_iter
+	return i->ops->copy_from_iter_nocache(addr, bytes, i);
 #endif
+}
 
 static __always_inline __must_check
 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
@@ -200,6 +302,16 @@  size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 		return _copy_from_iter_flushcache(addr, bytes, i);
 }
 
+static __always_inline __must_check
+size_t _copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	return i->ops->copy_mc_to_iter(addr, bytes, i);
+#else
+	return i->ops->copy_to_iter(addr, bytes, i);
+#endif
+}
+
 static __always_inline __must_check
 size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
@@ -209,25 +321,47 @@  size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 		return _copy_mc_to_iter(addr, bytes, i);
 }
 
-size_t iov_iter_zero(size_t bytes, struct iov_iter *);
-unsigned long iov_iter_alignment(const struct iov_iter *i);
-unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
-			unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
-			unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
-			unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
-			size_t count);
-void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
+static inline
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	return i->ops->zero(bytes, i);
+}
+static inline
+unsigned long iov_iter_alignment(const struct iov_iter *i)
+{
+	return i->ops->alignment(i);
+}
+static inline
+unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
+{
+	return i->ops->gap_alignment(i);
+}
+
+static inline
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
-			size_t maxsize, unsigned maxpages, size_t *start);
+			size_t maxsize, unsigned maxpages, size_t *start)
+{
+	return i->ops->get_pages(i, pages, maxsize, maxpages, start);
+}
+
+static inline
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
-			size_t maxsize, size_t *start);
-int iov_iter_npages(const struct iov_iter *i, int maxpages);
+			size_t maxsize, size_t *start)
+{
+	return i->ops->get_pages_alloc(i, pages, maxsize, start);
+}
+
+static inline
+int iov_iter_npages(const struct iov_iter *i, int maxpages)
+{
+	return i->ops->npages(i, maxpages);
+}
 
-const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+static inline
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+{
+	return old->ops->dup_iter(new, old, flags);
+}
 
 static inline size_t iov_iter_count(const struct iov_iter *i)
 {
@@ -260,9 +394,22 @@  static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
 	i->count = count;
 }
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i);
-size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
-bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+
+static inline
+size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i)
+{
+	return i->ops->csum_and_copy_to_iter(addr, bytes, csump, i);
+}
+static inline
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
+{
+	return i->ops->csum_and_copy_from_iter(addr, bytes, csum, i);
+}
+static inline
+bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
+{
+	return i->ops->csum_and_copy_from_iter_full(addr, bytes, csum, i);
+}
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i);
 
@@ -278,8 +425,12 @@  ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
+static inline
 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
 			    int (*f)(struct kvec *vec, void *context),
-			    void *context);
+			    void *context)
+{
+	return i->ops->for_each_range(i, bytes, f, context);
+}
 
 #endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1635111c5bd2..e403d524c797 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -13,6 +13,12 @@ 
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
 
+static const struct iov_iter_ops iovec_iter_ops;
+static const struct iov_iter_ops kvec_iter_ops;
+static const struct iov_iter_ops bvec_iter_ops;
+static const struct iov_iter_ops pipe_iter_ops;
+static const struct iov_iter_ops discard_iter_ops;
+
 #define PIPE_PARANOIA /* for now */
 
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
@@ -81,15 +87,15 @@ 
 #define iterate_all_kinds(i, n, v, I, B, K) {			\
 	if (likely(n)) {					\
 		size_t skip = i->iov_offset;			\
-		if (unlikely(i->type & ITER_BVEC)) {		\
+		if (unlikely(iov_iter_type(i) & ITER_BVEC)) {		\
 			struct bio_vec v;			\
 			struct bvec_iter __bi;			\
 			iterate_bvec(i, n, v, __bi, skip, (B))	\
-		} else if (unlikely(i->type & ITER_KVEC)) {	\
+		} else if (unlikely(iov_iter_type(i) & ITER_KVEC)) {	\
 			const struct kvec *kvec;		\
 			struct kvec v;				\
 			iterate_kvec(i, n, v, kvec, skip, (K))	\
-		} else if (unlikely(i->type & ITER_DISCARD)) {	\
+		} else if (unlikely(iov_iter_type(i) & ITER_DISCARD)) {	\
 		} else {					\
 			const struct iovec *iov;		\
 			struct iovec v;				\
@@ -103,7 +109,7 @@ 
 		n = i->count;					\
 	if (i->count) {						\
 		size_t skip = i->iov_offset;			\
-		if (unlikely(i->type & ITER_BVEC)) {		\
+		if (unlikely(iov_iter_type(i) & ITER_BVEC)) {		\
 			const struct bio_vec *bvec = i->bvec;	\
 			struct bio_vec v;			\
 			struct bvec_iter __bi;			\
@@ -111,7 +117,7 @@ 
 			i->bvec = __bvec_iter_bvec(i->bvec, __bi);	\
 			i->nr_segs -= i->bvec - bvec;		\
 			skip = __bi.bi_bvec_done;		\
-		} else if (unlikely(i->type & ITER_KVEC)) {	\
+		} else if (unlikely(iov_iter_type(i) & ITER_KVEC)) {	\
 			const struct kvec *kvec;		\
 			struct kvec v;				\
 			iterate_kvec(i, n, v, kvec, skip, (K))	\
@@ -121,7 +127,7 @@ 
 			}					\
 			i->nr_segs -= kvec - i->kvec;		\
 			i->kvec = kvec;				\
-		} else if (unlikely(i->type & ITER_DISCARD)) {	\
+		} else if (unlikely(iov_iter_type(i) & ITER_DISCARD)) {	\
 			skip += n;				\
 		} else {					\
 			const struct iovec *iov;		\
@@ -427,14 +433,14 @@  static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  * because it is an invalid address).
  */
-int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
+static int xxx_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
 	size_t skip = i->iov_offset;
 	const struct iovec *iov;
 	int err;
 	struct iovec v;
 
-	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
+	if (!(iov_iter_type(i) & (ITER_BVEC|ITER_KVEC))) {
 		iterate_iovec(i, bytes, v, iov, skip, ({
 			err = fault_in_pages_readable(v.iov_base, v.iov_len);
 			if (unlikely(err))
@@ -443,7 +449,6 @@  int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
 void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
@@ -454,10 +459,12 @@  void iov_iter_init(struct iov_iter *i, unsigned int direction,
 
 	/* It will get better.  Eventually... */
 	if (uaccess_kernel()) {
-		i->type = ITER_KVEC | direction;
+		i->ops = &kvec_iter_ops;
+		i->flags = direction;
 		i->kvec = (struct kvec *)iov;
 	} else {
-		i->type = ITER_IOVEC | direction;
+		i->ops = &iovec_iter_ops;
+		i->flags = direction;
 		i->iov = iov;
 	}
 	i->nr_segs = nr_segs;
@@ -625,7 +632,7 @@  static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
 	return bytes;
 }
 
-size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+static size_t xxx_copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	if (unlikely(iov_iter_is_pipe(i)))
@@ -641,7 +648,6 @@  size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(_copy_to_iter);
 
 #ifdef CONFIG_ARCH_HAS_COPY_MC
 static int copyout_mc(void __user *to, const void *from, size_t n)
@@ -723,7 +729,7 @@  static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  *   a short copy.
  */
-size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
+static size_t xxx_copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
@@ -757,10 +763,9 @@  size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
 #endif /* CONFIG_ARCH_HAS_COPY_MC */
 
-size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+static size_t xxx_copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -778,9 +783,8 @@  size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(_copy_from_iter);
 
-bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
+static bool xxx_copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -805,9 +809,8 @@  bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 	iov_iter_advance(i, bytes);
 	return true;
 }
-EXPORT_SYMBOL(_copy_from_iter_full);
 
-size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+static size_t xxx_copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -824,7 +827,6 @@  size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(_copy_from_iter_nocache);
 
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /**
@@ -841,7 +843,7 @@  EXPORT_SYMBOL(_copy_from_iter_nocache);
  * bypass the cache for the ITER_IOVEC case, and on some archs may use
  * instructions that strand dirty-data in the cache.
  */
-size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
+static size_t xxx_copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -859,10 +861,9 @@  size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 #endif
 
-bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
+static bool xxx_copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -884,7 +885,6 @@  bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 	iov_iter_advance(i, bytes);
 	return true;
 }
-EXPORT_SYMBOL(_copy_from_iter_full_nocache);
 
 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 {
@@ -910,12 +910,12 @@  static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
 	return false;
 }
 
-size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
+static size_t xxx_copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
 		return 0;
-	if (i->type & (ITER_BVEC|ITER_KVEC)) {
+	if (iov_iter_type(i) & (ITER_BVEC|ITER_KVEC)) {
 		void *kaddr = kmap_atomic(page);
 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
@@ -927,9 +927,8 @@  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 	else
 		return copy_page_to_iter_pipe(page, offset, bytes, i);
 }
-EXPORT_SYMBOL(copy_page_to_iter);
 
-size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
+static size_t xxx_copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
@@ -938,15 +937,14 @@  size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 		WARN_ON(1);
 		return 0;
 	}
-	if (i->type & (ITER_BVEC|ITER_KVEC)) {
+	if (iov_iter_type(i) & (ITER_BVEC|ITER_KVEC)) {
 		void *kaddr = kmap_atomic(page);
-		size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
+		size_t wanted = xxx_copy_from_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
 	} else
 		return copy_page_from_iter_iovec(page, offset, bytes, i);
 }
-EXPORT_SYMBOL(copy_page_from_iter);
 
 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 {
@@ -975,7 +973,7 @@  static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 	return bytes;
 }
 
-size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+static size_t xxx_zero(size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_zero(bytes, i);
@@ -987,9 +985,8 @@  size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL(iov_iter_zero);
 
-size_t iov_iter_copy_from_user_atomic(struct page *page,
+static size_t xxx_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
 	char *kaddr = kmap_atomic(page), *p = kaddr + offset;
@@ -1011,7 +1008,6 @@  size_t iov_iter_copy_from_user_atomic(struct page *page,
 	kunmap_atomic(kaddr);
 	return bytes;
 }
-EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
 static inline void pipe_truncate(struct iov_iter *i)
 {
@@ -1067,7 +1063,7 @@  static void pipe_advance(struct iov_iter *i, size_t size)
 	pipe_truncate(i);
 }
 
-void iov_iter_advance(struct iov_iter *i, size_t size)
+static void xxx_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(iov_iter_is_pipe(i))) {
 		pipe_advance(i, size);
@@ -1079,9 +1075,8 @@  void iov_iter_advance(struct iov_iter *i, size_t size)
 	}
 	iterate_and_advance(i, size, v, 0, 0, 0)
 }
-EXPORT_SYMBOL(iov_iter_advance);
 
-void iov_iter_revert(struct iov_iter *i, size_t unroll)
+static void xxx_revert(struct iov_iter *i, size_t unroll)
 {
 	if (!unroll)
 		return;
@@ -1147,12 +1142,11 @@  void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		}
 	}
 }
-EXPORT_SYMBOL(iov_iter_revert);
 
 /*
  * Return the count of just the current iov_iter segment.
  */
-size_t iov_iter_single_seg_count(const struct iov_iter *i)
+static size_t xxx_single_seg_count(const struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return i->count;	// it is a silly place, anyway
@@ -1165,14 +1159,14 @@  size_t iov_iter_single_seg_count(const struct iov_iter *i)
 	else
 		return min(i->count, i->iov->iov_len - i->iov_offset);
 }
-EXPORT_SYMBOL(iov_iter_single_seg_count);
 
 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
-			const struct kvec *kvec, unsigned long nr_segs,
-			size_t count)
+		   const struct kvec *kvec, unsigned long nr_segs,
+		   size_t count)
 {
 	WARN_ON(direction & ~(READ | WRITE));
-	i->type = ITER_KVEC | (direction & (READ | WRITE));
+	i->ops = &kvec_iter_ops;
+	i->flags = direction & (READ | WRITE);
 	i->kvec = kvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1185,7 +1179,8 @@  void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 			size_t count)
 {
 	WARN_ON(direction & ~(READ | WRITE));
-	i->type = ITER_BVEC | (direction & (READ | WRITE));
+	i->ops = &bvec_iter_ops;
+	i->flags = direction & (READ | WRITE);
 	i->bvec = bvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1199,7 +1194,8 @@  void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 {
 	BUG_ON(direction != READ);
 	WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
-	i->type = ITER_PIPE | READ;
+	i->ops = &pipe_iter_ops;
+	i->flags = READ;
 	i->pipe = pipe;
 	i->head = pipe->head;
 	i->iov_offset = 0;
@@ -1220,13 +1216,14 @@  EXPORT_SYMBOL(iov_iter_pipe);
 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
 {
 	BUG_ON(direction != READ);
-	i->type = ITER_DISCARD | READ;
+	i->ops = &discard_iter_ops;
+	i->flags = READ;
 	i->count = count;
 	i->iov_offset = 0;
 }
 EXPORT_SYMBOL(iov_iter_discard);
 
-unsigned long iov_iter_alignment(const struct iov_iter *i)
+static unsigned long xxx_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
 	size_t size = i->count;
@@ -1245,9 +1242,8 @@  unsigned long iov_iter_alignment(const struct iov_iter *i)
 	)
 	return res;
 }
-EXPORT_SYMBOL(iov_iter_alignment);
 
-unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
+static unsigned long xxx_gap_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
 	size_t size = i->count;
@@ -1267,7 +1263,6 @@  unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 		);
 	return res;
 }
-EXPORT_SYMBOL(iov_iter_gap_alignment);
 
 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
 				size_t maxsize,
@@ -1313,7 +1308,7 @@  static ssize_t pipe_get_pages(struct iov_iter *i,
 	return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
 }
 
-ssize_t iov_iter_get_pages(struct iov_iter *i,
+static ssize_t xxx_get_pages(struct iov_iter *i,
 		   struct page **pages, size_t maxsize, unsigned maxpages,
 		   size_t *start)
 {
@@ -1352,7 +1347,6 @@  ssize_t iov_iter_get_pages(struct iov_iter *i,
 	)
 	return 0;
 }
-EXPORT_SYMBOL(iov_iter_get_pages);
 
 static struct page **get_pages_array(size_t n)
 {
@@ -1392,7 +1386,7 @@  static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
 	return n;
 }
 
-ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+static ssize_t xxx_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   size_t *start)
 {
@@ -1439,9 +1433,8 @@  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	)
 	return 0;
 }
-EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
-size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
+static size_t xxx_csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 			       struct iov_iter *i)
 {
 	char *to = addr;
@@ -1478,9 +1471,8 @@  size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	*csum = sum;
 	return bytes;
 }
-EXPORT_SYMBOL(csum_and_copy_from_iter);
 
-bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
+static bool xxx_csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 			       struct iov_iter *i)
 {
 	char *to = addr;
@@ -1520,9 +1512,8 @@  bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	iov_iter_advance(i, bytes);
 	return true;
 }
-EXPORT_SYMBOL(csum_and_copy_from_iter_full);
 
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
+static size_t xxx_csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 			     struct iov_iter *i)
 {
 	const char *from = addr;
@@ -1564,7 +1555,6 @@  size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 	*csum = sum;
 	return bytes;
 }
-EXPORT_SYMBOL(csum_and_copy_to_iter);
 
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i)
@@ -1585,7 +1575,7 @@  size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 }
 EXPORT_SYMBOL(hash_and_copy_to_iter);
 
-int iov_iter_npages(const struct iov_iter *i, int maxpages)
+static int xxx_npages(const struct iov_iter *i, int maxpages)
 {
 	size_t size = i->count;
 	int npages = 0;
@@ -1628,9 +1618,8 @@  int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	)
 	return npages;
 }
-EXPORT_SYMBOL(iov_iter_npages);
 
-const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+static const void *xxx_dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
 	*new = *old;
 	if (unlikely(iov_iter_is_pipe(new))) {
@@ -1649,7 +1638,6 @@  const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 				   new->nr_segs * sizeof(struct iovec),
 				   flags);
 }
-EXPORT_SYMBOL(dup_iter);
 
 static int copy_compat_iovec_from_user(struct iovec *iov,
 		const struct iovec __user *uvec, unsigned long nr_segs)
@@ -1826,7 +1814,7 @@  int import_single_range(int rw, void __user *buf, size_t len,
 }
 EXPORT_SYMBOL(import_single_range);
 
-int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
+static int xxx_for_each_range(struct iov_iter *i, size_t bytes,
 			    int (*f)(struct kvec *vec, void *context),
 			    void *context)
 {
@@ -1846,4 +1834,173 @@  int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
 	)
 	return err;
 }
-EXPORT_SYMBOL(iov_iter_for_each_range);
+
+static const struct iov_iter_ops iovec_iter_ops = {
+	.type				= ITER_IOVEC,
+	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
+	.advance			= xxx_advance,
+	.revert				= xxx_revert,
+	.fault_in_readable		= xxx_fault_in_readable,
+	.single_seg_count		= xxx_single_seg_count,
+	.copy_page_to_iter		= xxx_copy_page_to_iter,
+	.copy_page_from_iter		= xxx_copy_page_from_iter,
+	.copy_to_iter			= xxx_copy_to_iter,
+	.copy_from_iter			= xxx_copy_from_iter,
+	.copy_from_iter_full		= xxx_copy_from_iter_full,
+	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
+	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
+#endif
+	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
+	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
+	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
+
+	.zero				= xxx_zero,
+	.alignment			= xxx_alignment,
+	.gap_alignment			= xxx_gap_alignment,
+	.get_pages			= xxx_get_pages,
+	.get_pages_alloc		= xxx_get_pages_alloc,
+	.npages				= xxx_npages,
+	.dup_iter			= xxx_dup_iter,
+	.for_each_range			= xxx_for_each_range,
+};
+
+static const struct iov_iter_ops kvec_iter_ops = {
+	.type				= ITER_KVEC,
+	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
+	.advance			= xxx_advance,
+	.revert				= xxx_revert,
+	.fault_in_readable		= xxx_fault_in_readable,
+	.single_seg_count		= xxx_single_seg_count,
+	.copy_page_to_iter		= xxx_copy_page_to_iter,
+	.copy_page_from_iter		= xxx_copy_page_from_iter,
+	.copy_to_iter			= xxx_copy_to_iter,
+	.copy_from_iter			= xxx_copy_from_iter,
+	.copy_from_iter_full		= xxx_copy_from_iter_full,
+	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
+	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
+#endif
+	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
+	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
+	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
+
+	.zero				= xxx_zero,
+	.alignment			= xxx_alignment,
+	.gap_alignment			= xxx_gap_alignment,
+	.get_pages			= xxx_get_pages,
+	.get_pages_alloc		= xxx_get_pages_alloc,
+	.npages				= xxx_npages,
+	.dup_iter			= xxx_dup_iter,
+	.for_each_range			= xxx_for_each_range,
+};
+
+static const struct iov_iter_ops bvec_iter_ops = {
+	.type				= ITER_BVEC,
+	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
+	.advance			= xxx_advance,
+	.revert				= xxx_revert,
+	.fault_in_readable		= xxx_fault_in_readable,
+	.single_seg_count		= xxx_single_seg_count,
+	.copy_page_to_iter		= xxx_copy_page_to_iter,
+	.copy_page_from_iter		= xxx_copy_page_from_iter,
+	.copy_to_iter			= xxx_copy_to_iter,
+	.copy_from_iter			= xxx_copy_from_iter,
+	.copy_from_iter_full		= xxx_copy_from_iter_full,
+	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
+	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
+#endif
+	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
+	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
+	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
+
+	.zero				= xxx_zero,
+	.alignment			= xxx_alignment,
+	.gap_alignment			= xxx_gap_alignment,
+	.get_pages			= xxx_get_pages,
+	.get_pages_alloc		= xxx_get_pages_alloc,
+	.npages				= xxx_npages,
+	.dup_iter			= xxx_dup_iter,
+	.for_each_range			= xxx_for_each_range,
+};
+
+static const struct iov_iter_ops pipe_iter_ops = {
+	.type				= ITER_PIPE,
+	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
+	.advance			= xxx_advance,
+	.revert				= xxx_revert,
+	.fault_in_readable		= xxx_fault_in_readable,
+	.single_seg_count		= xxx_single_seg_count,
+	.copy_page_to_iter		= xxx_copy_page_to_iter,
+	.copy_page_from_iter		= xxx_copy_page_from_iter,
+	.copy_to_iter			= xxx_copy_to_iter,
+	.copy_from_iter			= xxx_copy_from_iter,
+	.copy_from_iter_full		= xxx_copy_from_iter_full,
+	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
+	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
+#endif
+	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
+	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
+	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
+
+	.zero				= xxx_zero,
+	.alignment			= xxx_alignment,
+	.gap_alignment			= xxx_gap_alignment,
+	.get_pages			= xxx_get_pages,
+	.get_pages_alloc		= xxx_get_pages_alloc,
+	.npages				= xxx_npages,
+	.dup_iter			= xxx_dup_iter,
+	.for_each_range			= xxx_for_each_range,
+};
+
+static const struct iov_iter_ops discard_iter_ops = {
+	.type				= ITER_DISCARD,
+	.copy_from_user_atomic		= xxx_copy_from_user_atomic,
+	.advance			= xxx_advance,
+	.revert				= xxx_revert,
+	.fault_in_readable		= xxx_fault_in_readable,
+	.single_seg_count		= xxx_single_seg_count,
+	.copy_page_to_iter		= xxx_copy_page_to_iter,
+	.copy_page_from_iter		= xxx_copy_page_from_iter,
+	.copy_to_iter			= xxx_copy_to_iter,
+	.copy_from_iter			= xxx_copy_from_iter,
+	.copy_from_iter_full		= xxx_copy_from_iter_full,
+	.copy_from_iter_nocache		= xxx_copy_from_iter_nocache,
+	.copy_from_iter_full_nocache	= xxx_copy_from_iter_full_nocache,
+#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
+	.copy_from_iter_flushcache	= xxx_copy_from_iter_flushcache,
+#endif
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+	.copy_mc_to_iter		= xxx_copy_mc_to_iter,
+#endif
+	.csum_and_copy_to_iter		= xxx_csum_and_copy_to_iter,
+	.csum_and_copy_from_iter	= xxx_csum_and_copy_from_iter,
+	.csum_and_copy_from_iter_full	= xxx_csum_and_copy_from_iter_full,
+
+	.zero				= xxx_zero,
+	.alignment			= xxx_alignment,
+	.gap_alignment			= xxx_gap_alignment,
+	.get_pages			= xxx_get_pages,
+	.get_pages_alloc		= xxx_get_pages_alloc,
+	.npages				= xxx_npages,
+	.dup_iter			= xxx_dup_iter,
+	.for_each_range			= xxx_for_each_range,
+};