diff mbox series

[09/44] new iov_iter flavour - ITER_UBUF

Message ID 20220622041552.737754-9-viro@zeniv.linux.org.uk (mailing list archive)
State New, archived
Headers show
Series [01/44] 9p: handling Rerror without copy_from_iter_full() | expand

Commit Message

Al Viro June 22, 2022, 4:15 a.m. UTC
Equivalent of single-segment iovec.  Initialized by iov_iter_ubuf(),
checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC
ones.

We are going to expose the things like ->write_iter() et.al. to those
in subsequent commits.

New predicate (user_backed_iter()) that is true for ITER_IOVEC and
ITER_UBUF; places like direct-IO handling should use that for
checking that pages we modify after getting them from iov_iter_get_pages()
would need to be dirtied.

DO NOT assume that replacing iter_is_iovec() with user_backed_iter()
will solve all problems - there's code that uses iter_is_iovec() to
decide how to poke around in iov_iter guts and for that the predicate
replacement obviously won't suffice.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 block/fops.c         |  6 +--
 fs/ceph/file.c       |  2 +-
 fs/cifs/file.c       |  2 +-
 fs/direct-io.c       |  2 +-
 fs/fuse/dev.c        |  4 +-
 fs/fuse/file.c       |  2 +-
 fs/gfs2/file.c       |  2 +-
 fs/iomap/direct-io.c |  2 +-
 fs/nfs/direct.c      |  2 +-
 include/linux/uio.h  | 26 ++++++++++++
 lib/iov_iter.c       | 94 ++++++++++++++++++++++++++++++++++----------
 mm/shmem.c           |  2 +-
 12 files changed, 113 insertions(+), 33 deletions(-)

Comments

Jeff Layton June 27, 2022, 6:47 p.m. UTC | #1
On Wed, 2022-06-22 at 05:15 +0100, Al Viro wrote:
> Equivalent of single-segment iovec.  Initialized by iov_iter_ubuf(),
> checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC
> ones.
> 
> We are going to expose the things like ->write_iter() et.al. to those
> in subsequent commits.
> 
> New predicate (user_backed_iter()) that is true for ITER_IOVEC and
> ITER_UBUF; places like direct-IO handling should use that for
> checking that pages we modify after getting them from iov_iter_get_pages()
> would need to be dirtied.
> 
> DO NOT assume that replacing iter_is_iovec() with user_backed_iter()
> will solve all problems - there's code that uses iter_is_iovec() to
> decide how to poke around in iov_iter guts and for that the predicate
> replacement obviously won't suffice.
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  block/fops.c         |  6 +--
>  fs/ceph/file.c       |  2 +-
>  fs/cifs/file.c       |  2 +-
>  fs/direct-io.c       |  2 +-
>  fs/fuse/dev.c        |  4 +-
>  fs/fuse/file.c       |  2 +-
>  fs/gfs2/file.c       |  2 +-
>  fs/iomap/direct-io.c |  2 +-
>  fs/nfs/direct.c      |  2 +-
>  include/linux/uio.h  | 26 ++++++++++++
>  lib/iov_iter.c       | 94 ++++++++++++++++++++++++++++++++++----------
>  mm/shmem.c           |  2 +-
>  12 files changed, 113 insertions(+), 33 deletions(-)
> 
> diff --git a/block/fops.c b/block/fops.c
> index 6e86931ab847..3e68d69e0ee3 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -69,7 +69,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
>  
>  	if (iov_iter_rw(iter) == READ) {
>  		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
> -		if (iter_is_iovec(iter))
> +		if (user_backed_iter(iter))
>  			should_dirty = true;
>  	} else {
>  		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
> @@ -199,7 +199,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	dio->size = 0;
> -	if (is_read && iter_is_iovec(iter))
> +	if (is_read && user_backed_iter(iter))
>  		dio->flags |= DIO_SHOULD_DIRTY;
>  
>  	blk_start_plug(&plug);
> @@ -331,7 +331,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>  	dio->size = bio->bi_iter.bi_size;
>  
>  	if (is_read) {
> -		if (iter_is_iovec(iter)) {
> +		if (user_backed_iter(iter)) {
>  			dio->flags |= DIO_SHOULD_DIRTY;
>  			bio_set_pages_dirty(bio);
>  		}
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 8c8226c0feac..e132adeeaf16 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1262,7 +1262,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	size_t count = iov_iter_count(iter);
>  	loff_t pos = iocb->ki_pos;
>  	bool write = iov_iter_rw(iter) == WRITE;
> -	bool should_dirty = !write && iter_is_iovec(iter);
> +	bool should_dirty = !write && user_backed_iter(iter);
>  
>  	if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>  		return -EROFS;
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 1618e0537d58..4b4129d9a90c 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -4004,7 +4004,7 @@ static ssize_t __cifs_readv(
>  	if (!is_sync_kiocb(iocb))
>  		ctx->iocb = iocb;
>  
> -	if (iter_is_iovec(to))
> +	if (user_backed_iter(to))
>  		ctx->should_dirty = true;
>  
>  	if (direct) {
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index 39647eb56904..72237f49ad94 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1245,7 +1245,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  	spin_lock_init(&dio->bio_lock);
>  	dio->refcount = 1;
>  
> -	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
> +	dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ;
>  	sdio.iter = iter;
>  	sdio.final_block_in_request = end >> blkbits;
>  
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 0e537e580dc1..8d657c2cd6f7 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1356,7 +1356,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
>  	if (!fud)
>  		return -EPERM;
>  
> -	if (!iter_is_iovec(to))
> +	if (!user_backed_iter(to))
>  		return -EINVAL;
>  
>  	fuse_copy_init(&cs, 1, to);
> @@ -1949,7 +1949,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (!fud)
>  		return -EPERM;
>  
> -	if (!iter_is_iovec(from))
> +	if (!user_backed_iter(from))
>  		return -EINVAL;
>  
>  	fuse_copy_init(&cs, 0, from);
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 00fa861aeead..c982e3afe3b4 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1465,7 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
>  			inode_unlock(inode);
>  	}
>  
> -	io->should_dirty = !write && iter_is_iovec(iter);
> +	io->should_dirty = !write && user_backed_iter(iter);
>  	while (count) {
>  		ssize_t nres;
>  		fl_owner_t owner = current->files;
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 2cceb193dcd8..48e6cc74fdc1 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -780,7 +780,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
>  
>  	if (!count)
>  		return false;
> -	if (!iter_is_iovec(i))
> +	if (!user_backed_iter(i))
>  		return false;
>  
>  	size = PAGE_SIZE;
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 31c7f1035b20..d5c7d019653b 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -533,7 +533,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  			iomi.flags |= IOMAP_NOWAIT;
>  		}
>  
> -		if (iter_is_iovec(iter))
> +		if (user_backed_iter(iter))
>  			dio->flags |= IOMAP_DIO_DIRTY;
>  	} else {
>  		iomi.flags |= IOMAP_WRITE;
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 4eb2a8380a28..022e1ce63e62 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -478,7 +478,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
>  	if (!is_sync_kiocb(iocb))
>  		dreq->iocb = iocb;
>  
> -	if (iter_is_iovec(iter))
> +	if (user_backed_iter(iter))
>  		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
>  
>  	if (!swap)
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 76d305f3d4c2..6ab4260c3d6c 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -26,6 +26,7 @@ enum iter_type {
>  	ITER_PIPE,
>  	ITER_XARRAY,
>  	ITER_DISCARD,
> +	ITER_UBUF,
>  };
>  
>  struct iov_iter_state {
> @@ -38,6 +39,7 @@ struct iov_iter {
>  	u8 iter_type;
>  	bool nofault;
>  	bool data_source;
> +	bool user_backed;
>  	size_t iov_offset;
>  	size_t count;
>  	union {
> @@ -46,6 +48,7 @@ struct iov_iter {
>  		const struct bio_vec *bvec;
>  		struct xarray *xarray;
>  		struct pipe_inode_info *pipe;
> +		void __user *ubuf;
>  	};
>  	union {
>  		unsigned long nr_segs;
> @@ -70,6 +73,11 @@ static inline void iov_iter_save_state(struct iov_iter *iter,
>  	state->nr_segs = iter->nr_segs;
>  }
>  
> +static inline bool iter_is_ubuf(const struct iov_iter *i)
> +{
> +	return iov_iter_type(i) == ITER_UBUF;
> +}
> +
>  static inline bool iter_is_iovec(const struct iov_iter *i)
>  {
>  	return iov_iter_type(i) == ITER_IOVEC;
> @@ -105,6 +113,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
>  	return i->data_source ? WRITE : READ;
>  }
>  
> +static inline bool user_backed_iter(const struct iov_iter *i)
> +{
> +	return i->user_backed;
> +}
> +

nit: I wonder whether this new boolean is worth it over just checking
is_iter_iovec() || is_iter_ubuf. Not a big deal though.

>  /*
>   * Total number of bytes covered by an iovec.
>   *
> @@ -320,4 +333,17 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
>  int import_single_range(int type, void __user *buf, size_t len,
>  		 struct iovec *iov, struct iov_iter *i);
>  
> +static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
> +			void __user *buf, size_t count)
> +{
> +	WARN_ON(direction & ~(READ | WRITE));
> +	*i = (struct iov_iter) {
> +		.iter_type = ITER_UBUF,
> +		.user_backed = true,
> +		.data_source = direction,
> +		.ubuf = buf,
> +		.count = count
> +	};
> +}
> +
>  #endif
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 4c658a25e29c..8275b28e886b 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -16,6 +16,16 @@
>  
>  #define PIPE_PARANOIA /* for now */
>  
> +/* covers ubuf and kbuf alike */
> +#define iterate_buf(i, n, base, len, off, __p, STEP) {		\
> +	size_t __maybe_unused off = 0;				\
> +	len = n;						\
> +	base = __p + i->iov_offset;				\
> +	len -= (STEP);						\
> +	i->iov_offset += len;					\
> +	n = len;						\
> +}
> +
>  /* covers iovec and kvec alike */
>  #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
>  	size_t off = 0;						\
> @@ -110,7 +120,12 @@ __out:								\
>  	if (unlikely(i->count < n))				\
>  		n = i->count;					\
>  	if (likely(n)) {					\
> -		if (likely(iter_is_iovec(i))) {			\
> +		if (likely(iter_is_ubuf(i))) {			\
> +			void __user *base;			\
> +			size_t len;				\
> +			iterate_buf(i, n, base, len, off,	\
> +						i->ubuf, (I)) 	\
> +		} else if (likely(iter_is_iovec(i))) {		\
>  			const struct iovec *iov = i->iov;	\
>  			void __user *base;			\
>  			size_t len;				\
> @@ -275,7 +290,11 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
>   */
>  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
>  {
> -	if (iter_is_iovec(i)) {
> +	if (iter_is_ubuf(i)) {
> +		size_t n = min(size, iov_iter_count(i));
> +		n -= fault_in_readable(i->ubuf + i->iov_offset, n);
> +		return size - n;
> +	} else if (iter_is_iovec(i)) {
>  		size_t count = min(size, iov_iter_count(i));
>  		const struct iovec *p;
>  		size_t skip;
> @@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
>   */
>  size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
>  {
> -	if (iter_is_iovec(i)) {
> +	if (iter_is_ubuf(i)) {
> +		size_t n = min(size, iov_iter_count(i));
> +		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
> +		return size - n;
> +	} else if (iter_is_iovec(i)) {
>  		size_t count = min(size, iov_iter_count(i));
>  		const struct iovec *p;
>  		size_t skip;
> @@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
>  	*i = (struct iov_iter) {
>  		.iter_type = ITER_IOVEC,
>  		.nofault = false,
> +		.user_backed = true,
>  		.data_source = direction,
>  		.iov = iov,
>  		.nr_segs = nr_segs,
> @@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return copy_pipe_to_iter(addr, bytes, i);
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	iterate_and_advance(i, bytes, base, len, off,
>  		copyout(base, addr + off, len),
> @@ -576,7 +600,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return copy_mc_pipe_to_iter(addr, bytes, i);
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	__iterate_and_advance(i, bytes, base, len, off,
>  		copyout_mc(base, addr + off, len),
> @@ -594,7 +618,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
>  		WARN_ON(1);
>  		return 0;
>  	}
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	iterate_and_advance(i, bytes, base, len, off,
>  		copyin(addr + off, base, len),
> @@ -882,16 +906,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
>  {
>  	if (unlikely(i->count < size))
>  		size = i->count;
> -	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
> +	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
> +		i->iov_offset += size;
> +		i->count -= size;
> +	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
>  		/* iovec and kvec have identical layouts */
>  		iov_iter_iovec_advance(i, size);
>  	} else if (iov_iter_is_bvec(i)) {
>  		iov_iter_bvec_advance(i, size);
>  	} else if (iov_iter_is_pipe(i)) {
>  		pipe_advance(i, size);
> -	} else if (unlikely(iov_iter_is_xarray(i))) {
> -		i->iov_offset += size;
> -		i->count -= size;
>  	} else if (iov_iter_is_discard(i)) {
>  		i->count -= size;
>  	}
> @@ -938,7 +962,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
>  		return;
>  	}
>  	unroll -= i->iov_offset;
> -	if (iov_iter_is_xarray(i)) {
> +	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
>  		BUG(); /* We should never go beyond the start of the specified
>  			* range since we might then be straying into pages that
>  			* aren't pinned.
> @@ -1129,6 +1153,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
>  
>  unsigned long iov_iter_alignment(const struct iov_iter *i)
>  {
> +	if (likely(iter_is_ubuf(i))) {
> +		size_t size = i->count;
> +		if (size)
> +			return ((unsigned long)i->ubuf + i->iov_offset) | size;
> +		return 0;
> +	}
> +
>  	/* iovec and kvec have identical layouts */
>  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
>  		return iov_iter_alignment_iovec(i);
> @@ -1159,6 +1190,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  	size_t size = i->count;
>  	unsigned k;
>  
> +	if (iter_is_ubuf(i))
> +		return 0;
> +
>  	if (WARN_ON(!iter_is_iovec(i)))
>  		return ~0U;
>  
> @@ -1287,7 +1321,19 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
>  	return actual;
>  }
>  
> -/* must be done on non-empty ITER_IOVEC one */
> +static unsigned long found_ubuf_segment(unsigned long addr,
> +					size_t len,
> +					size_t *size, size_t *start,
> +					unsigned maxpages)
> +{
> +	len += (*start = addr % PAGE_SIZE);
> +	if (len > maxpages * PAGE_SIZE)
> +		len = maxpages * PAGE_SIZE;
> +	*size = len;
> +	return addr & PAGE_MASK;
> +}
> +
> +/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
>  static unsigned long first_iovec_segment(const struct iov_iter *i,
>  					 size_t *size, size_t *start,
>  					 size_t maxsize, unsigned maxpages)
> @@ -1295,6 +1341,11 @@ static unsigned long first_iovec_segment(const struct iov_iter *i,
>  	size_t skip;
>  	long k;
>  
> +	if (iter_is_ubuf(i)) {
> +		unsigned long addr = (unsigned long)i->ubuf + i->iov_offset;
> +		return found_ubuf_segment(addr, maxsize, size, start, maxpages);
> +	}
> +
>  	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
>  		unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
>  		size_t len = i->iov[k].iov_len - skip;
> @@ -1303,11 +1354,7 @@ static unsigned long first_iovec_segment(const struct iov_iter *i,
>  			continue;
>  		if (len > maxsize)
>  			len = maxsize;
> -		len += (*start = addr % PAGE_SIZE);
> -		if (len > maxpages * PAGE_SIZE)
> -			len = maxpages * PAGE_SIZE;
> -		*size = len;
> -		return addr & PAGE_MASK;
> +		return found_ubuf_segment(addr, len, size, start, maxpages);
>  	}
>  	BUG(); // if it had been empty, we wouldn't get called
>  }
> @@ -1344,7 +1391,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
>  	if (!maxsize)
>  		return 0;
>  
> -	if (likely(iter_is_iovec(i))) {
> +	if (likely(user_backed_iter(i))) {
>  		unsigned int gup_flags = 0;
>  		unsigned long addr;
>  
> @@ -1470,7 +1517,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
>  	if (!maxsize)
>  		return 0;
>  
> -	if (likely(iter_is_iovec(i))) {
> +	if (likely(user_backed_iter(i))) {
>  		unsigned int gup_flags = 0;
>  		unsigned long addr;
>  
> @@ -1624,6 +1671,11 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
>  {
>  	if (unlikely(!i->count))
>  		return 0;
> +	if (likely(iter_is_ubuf(i))) {
> +		unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
> +		int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
> +		return min(npages, maxpages);
> +	}
>  	/* iovec and kvec have identical layouts */
>  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
>  		return iov_npages(i, maxpages);
> @@ -1862,10 +1914,12 @@ EXPORT_SYMBOL(import_single_range);
>  void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
>  {
>  	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
> -			 !iov_iter_is_kvec(i))
> +			 !iov_iter_is_kvec(i) && !iter_is_ubuf(i))
>  		return;
>  	i->iov_offset = state->iov_offset;
>  	i->count = state->count;
> +	if (iter_is_ubuf(i))
> +		return;
>  	/*
>  	 * For the *vec iters, nr_segs + iov is constant - if we increment
>  	 * the vec, then we also decrement the nr_segs count. Hence we don't
> diff --git a/mm/shmem.c b/mm/shmem.c
> index a6f565308133..6b83f3971795 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -2603,7 +2603,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>  			ret = copy_page_to_iter(page, offset, nr, to);
>  			put_page(page);
>  
> -		} else if (iter_is_iovec(to)) {
> +		} else if (!user_backed_iter(to)) {
>  			/*
>  			 * Copy to user tends to be so well optimized, but
>  			 * clear_user() not so much, that it is noticeably

The code looks reasonable but is there any real benefit here? It seems
like the only user of it so far is new_sync_{read,write}, and both seem
to just use it to avoid allocating a single iovec on the stack.
Christian Brauner June 28, 2022, 12:38 p.m. UTC | #2
On Wed, Jun 22, 2022 at 05:15:17AM +0100, Al Viro wrote:
> Equivalent of single-segment iovec.  Initialized by iov_iter_ubuf(),
> checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC
> ones.
> 
> We are going to expose the things like ->write_iter() et.al. to those
> in subsequent commits.
> 
> New predicate (user_backed_iter()) that is true for ITER_IOVEC and
> ITER_UBUF; places like direct-IO handling should use that for
> checking that pages we modify after getting them from iov_iter_get_pages()
> would need to be dirtied.
> 
> DO NOT assume that replacing iter_is_iovec() with user_backed_iter()
> will solve all problems - there's code that uses iter_is_iovec() to
> decide how to poke around in iov_iter guts and for that the predicate
> replacement obviously won't suffice.
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
>  block/fops.c         |  6 +--
>  fs/ceph/file.c       |  2 +-
>  fs/cifs/file.c       |  2 +-
>  fs/direct-io.c       |  2 +-
>  fs/fuse/dev.c        |  4 +-
>  fs/fuse/file.c       |  2 +-
>  fs/gfs2/file.c       |  2 +-
>  fs/iomap/direct-io.c |  2 +-
>  fs/nfs/direct.c      |  2 +-
>  include/linux/uio.h  | 26 ++++++++++++
>  lib/iov_iter.c       | 94 ++++++++++++++++++++++++++++++++++----------
>  mm/shmem.c           |  2 +-
>  12 files changed, 113 insertions(+), 33 deletions(-)
> 
> diff --git a/block/fops.c b/block/fops.c
> index 6e86931ab847..3e68d69e0ee3 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -69,7 +69,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
>  
>  	if (iov_iter_rw(iter) == READ) {
>  		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
> -		if (iter_is_iovec(iter))
> +		if (user_backed_iter(iter))
>  			should_dirty = true;
>  	} else {
>  		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
> @@ -199,7 +199,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>  	}
>  
>  	dio->size = 0;
> -	if (is_read && iter_is_iovec(iter))
> +	if (is_read && user_backed_iter(iter))
>  		dio->flags |= DIO_SHOULD_DIRTY;
>  
>  	blk_start_plug(&plug);
> @@ -331,7 +331,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>  	dio->size = bio->bi_iter.bi_size;
>  
>  	if (is_read) {
> -		if (iter_is_iovec(iter)) {
> +		if (user_backed_iter(iter)) {
>  			dio->flags |= DIO_SHOULD_DIRTY;
>  			bio_set_pages_dirty(bio);
>  		}
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 8c8226c0feac..e132adeeaf16 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1262,7 +1262,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  	size_t count = iov_iter_count(iter);
>  	loff_t pos = iocb->ki_pos;
>  	bool write = iov_iter_rw(iter) == WRITE;
> -	bool should_dirty = !write && iter_is_iovec(iter);
> +	bool should_dirty = !write && user_backed_iter(iter);
>  
>  	if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
>  		return -EROFS;
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 1618e0537d58..4b4129d9a90c 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -4004,7 +4004,7 @@ static ssize_t __cifs_readv(
>  	if (!is_sync_kiocb(iocb))
>  		ctx->iocb = iocb;
>  
> -	if (iter_is_iovec(to))
> +	if (user_backed_iter(to))
>  		ctx->should_dirty = true;
>  
>  	if (direct) {
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index 39647eb56904..72237f49ad94 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1245,7 +1245,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
>  	spin_lock_init(&dio->bio_lock);
>  	dio->refcount = 1;
>  
> -	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
> +	dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ;
>  	sdio.iter = iter;
>  	sdio.final_block_in_request = end >> blkbits;
>  
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index 0e537e580dc1..8d657c2cd6f7 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -1356,7 +1356,7 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
>  	if (!fud)
>  		return -EPERM;
>  
> -	if (!iter_is_iovec(to))
> +	if (!user_backed_iter(to))
>  		return -EINVAL;
>  
>  	fuse_copy_init(&cs, 1, to);
> @@ -1949,7 +1949,7 @@ static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
>  	if (!fud)
>  		return -EPERM;
>  
> -	if (!iter_is_iovec(from))
> +	if (!user_backed_iter(from))
>  		return -EINVAL;
>  
>  	fuse_copy_init(&cs, 0, from);
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index 00fa861aeead..c982e3afe3b4 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1465,7 +1465,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
>  			inode_unlock(inode);
>  	}
>  
> -	io->should_dirty = !write && iter_is_iovec(iter);
> +	io->should_dirty = !write && user_backed_iter(iter);
>  	while (count) {
>  		ssize_t nres;
>  		fl_owner_t owner = current->files;
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 2cceb193dcd8..48e6cc74fdc1 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -780,7 +780,7 @@ static inline bool should_fault_in_pages(struct iov_iter *i,
>  
>  	if (!count)
>  		return false;
> -	if (!iter_is_iovec(i))
> +	if (!user_backed_iter(i))
>  		return false;
>  
>  	size = PAGE_SIZE;
> diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
> index 31c7f1035b20..d5c7d019653b 100644
> --- a/fs/iomap/direct-io.c
> +++ b/fs/iomap/direct-io.c
> @@ -533,7 +533,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
>  			iomi.flags |= IOMAP_NOWAIT;
>  		}
>  
> -		if (iter_is_iovec(iter))
> +		if (user_backed_iter(iter))
>  			dio->flags |= IOMAP_DIO_DIRTY;
>  	} else {
>  		iomi.flags |= IOMAP_WRITE;
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 4eb2a8380a28..022e1ce63e62 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -478,7 +478,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
>  	if (!is_sync_kiocb(iocb))
>  		dreq->iocb = iocb;
>  
> -	if (iter_is_iovec(iter))
> +	if (user_backed_iter(iter))
>  		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
>  
>  	if (!swap)
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index 76d305f3d4c2..6ab4260c3d6c 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -26,6 +26,7 @@ enum iter_type {
>  	ITER_PIPE,
>  	ITER_XARRAY,
>  	ITER_DISCARD,
> +	ITER_UBUF,
>  };
>  
>  struct iov_iter_state {
> @@ -38,6 +39,7 @@ struct iov_iter {
>  	u8 iter_type;
>  	bool nofault;
>  	bool data_source;
> +	bool user_backed;
>  	size_t iov_offset;
>  	size_t count;
>  	union {
> @@ -46,6 +48,7 @@ struct iov_iter {
>  		const struct bio_vec *bvec;
>  		struct xarray *xarray;
>  		struct pipe_inode_info *pipe;
> +		void __user *ubuf;
>  	};
>  	union {
>  		unsigned long nr_segs;
> @@ -70,6 +73,11 @@ static inline void iov_iter_save_state(struct iov_iter *iter,
>  	state->nr_segs = iter->nr_segs;
>  }
>  
> +static inline bool iter_is_ubuf(const struct iov_iter *i)
> +{
> +	return iov_iter_type(i) == ITER_UBUF;
> +}
> +
>  static inline bool iter_is_iovec(const struct iov_iter *i)
>  {
>  	return iov_iter_type(i) == ITER_IOVEC;
> @@ -105,6 +113,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
>  	return i->data_source ? WRITE : READ;
>  }
>  
> +static inline bool user_backed_iter(const struct iov_iter *i)
> +{
> +	return i->user_backed;
> +}
> +
>  /*
>   * Total number of bytes covered by an iovec.
>   *
> @@ -320,4 +333,17 @@ ssize_t __import_iovec(int type, const struct iovec __user *uvec,
>  int import_single_range(int type, void __user *buf, size_t len,
>  		 struct iovec *iov, struct iov_iter *i);
>  
> +static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
> +			void __user *buf, size_t count)
> +{
> +	WARN_ON(direction & ~(READ | WRITE));
> +	*i = (struct iov_iter) {
> +		.iter_type = ITER_UBUF,
> +		.user_backed = true,
> +		.data_source = direction,
> +		.ubuf = buf,
> +		.count = count
> +	};
> +}
> +
>  #endif
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 4c658a25e29c..8275b28e886b 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -16,6 +16,16 @@
>  
>  #define PIPE_PARANOIA /* for now */
>  
> +/* covers ubuf and kbuf alike */
> +#define iterate_buf(i, n, base, len, off, __p, STEP) {		\
> +	size_t __maybe_unused off = 0;				\
> +	len = n;						\
> +	base = __p + i->iov_offset;				\
> +	len -= (STEP);						\
> +	i->iov_offset += len;					\
> +	n = len;						\
> +}
> +
>  /* covers iovec and kvec alike */
>  #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
>  	size_t off = 0;						\
> @@ -110,7 +120,12 @@ __out:								\
>  	if (unlikely(i->count < n))				\
>  		n = i->count;					\
>  	if (likely(n)) {					\
> -		if (likely(iter_is_iovec(i))) {			\
> +		if (likely(iter_is_ubuf(i))) {			\
> +			void __user *base;			\
> +			size_t len;				\
> +			iterate_buf(i, n, base, len, off,	\
> +						i->ubuf, (I)) 	\
> +		} else if (likely(iter_is_iovec(i))) {		\
>  			const struct iovec *iov = i->iov;	\
>  			void __user *base;			\
>  			size_t len;				\
> @@ -275,7 +290,11 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
>   */
>  size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
>  {
> -	if (iter_is_iovec(i)) {
> +	if (iter_is_ubuf(i)) {
> +		size_t n = min(size, iov_iter_count(i));
> +		n -= fault_in_readable(i->ubuf + i->iov_offset, n);
> +		return size - n;
> +	} else if (iter_is_iovec(i)) {
>  		size_t count = min(size, iov_iter_count(i));
>  		const struct iovec *p;
>  		size_t skip;
> @@ -314,7 +333,11 @@ EXPORT_SYMBOL(fault_in_iov_iter_readable);
>   */
>  size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
>  {
> -	if (iter_is_iovec(i)) {
> +	if (iter_is_ubuf(i)) {
> +		size_t n = min(size, iov_iter_count(i));
> +		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
> +		return size - n;
> +	} else if (iter_is_iovec(i)) {
>  		size_t count = min(size, iov_iter_count(i));
>  		const struct iovec *p;
>  		size_t skip;
> @@ -345,6 +368,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
>  	*i = (struct iov_iter) {
>  		.iter_type = ITER_IOVEC,
>  		.nofault = false,
> +		.user_backed = true,
>  		.data_source = direction,
>  		.iov = iov,
>  		.nr_segs = nr_segs,
> @@ -494,7 +518,7 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return copy_pipe_to_iter(addr, bytes, i);
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	iterate_and_advance(i, bytes, base, len, off,
>  		copyout(base, addr + off, len),
> @@ -576,7 +600,7 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
>  {
>  	if (unlikely(iov_iter_is_pipe(i)))
>  		return copy_mc_pipe_to_iter(addr, bytes, i);
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	__iterate_and_advance(i, bytes, base, len, off,
>  		copyout_mc(base, addr + off, len),
> @@ -594,7 +618,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
>  		WARN_ON(1);
>  		return 0;
>  	}
> -	if (iter_is_iovec(i))
> +	if (user_backed_iter(i))
>  		might_fault();
>  	iterate_and_advance(i, bytes, base, len, off,
>  		copyin(addr + off, base, len),
> @@ -882,16 +906,16 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
>  {
>  	if (unlikely(i->count < size))
>  		size = i->count;
> -	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
> +	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
> +		i->iov_offset += size;
> +		i->count -= size;
> +	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
>  		/* iovec and kvec have identical layouts */
>  		iov_iter_iovec_advance(i, size);
>  	} else if (iov_iter_is_bvec(i)) {
>  		iov_iter_bvec_advance(i, size);
>  	} else if (iov_iter_is_pipe(i)) {
>  		pipe_advance(i, size);
> -	} else if (unlikely(iov_iter_is_xarray(i))) {
> -		i->iov_offset += size;
> -		i->count -= size;
>  	} else if (iov_iter_is_discard(i)) {
>  		i->count -= size;
>  	}
> @@ -938,7 +962,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
>  		return;
>  	}
>  	unroll -= i->iov_offset;
> -	if (iov_iter_is_xarray(i)) {
> +	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
>  		BUG(); /* We should never go beyond the start of the specified
>  			* range since we might then be straying into pages that
>  			* aren't pinned.
> @@ -1129,6 +1153,13 @@ static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
>  
>  unsigned long iov_iter_alignment(const struct iov_iter *i)
>  {
> +	if (likely(iter_is_ubuf(i))) {
> +		size_t size = i->count;
> +		if (size)
> +			return ((unsigned long)i->ubuf + i->iov_offset) | size;
> +		return 0;
> +	}
> +
>  	/* iovec and kvec have identical layouts */
>  	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
>  		return iov_iter_alignment_iovec(i);
> @@ -1159,6 +1190,9 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
>  	size_t size = i->count;
>  	unsigned k;
>  
> +	if (iter_is_ubuf(i))
> +		return 0;
> +
>  	if (WARN_ON(!iter_is_iovec(i)))
>  		return ~0U;
>  
> @@ -1287,7 +1321,19 @@ static ssize_t iter_xarray_get_pages(struct iov_iter *i,
>  	return actual;
>  }
>  
> -/* must be done on non-empty ITER_IOVEC one */
> +static unsigned long found_ubuf_segment(unsigned long addr,
> +					size_t len,
> +					size_t *size, size_t *start,
> +					unsigned maxpages)
> +{
> +	len += (*start = addr % PAGE_SIZE);

Ugh, I know you just copy-pasted this but can we rewrite this to:

	*start = addr % PAGE_SIZE;
	len += *start;

I think that's easier to read.
Al Viro June 28, 2022, 6:41 p.m. UTC | #3
On Mon, Jun 27, 2022 at 02:47:03PM -0400, Jeff Layton wrote:
 
> The code looks reasonable but is there any real benefit here? It seems
> like the only user of it so far is new_sync_{read,write}, and both seem
> to just use it to avoid allocating a single iovec on the stack.

Not really - for one thing, it's less overhead in data-copying primitives,
for another... Jens had plans for it as well.  It's not as simple as "just
use it whenever you are asked for a single-segment iovec", but...
Al Viro June 28, 2022, 6:44 p.m. UTC | #4
On Tue, Jun 28, 2022 at 02:38:55PM +0200, Christian Brauner wrote:

> > -/* must be done on non-empty ITER_IOVEC one */
> > +static unsigned long found_ubuf_segment(unsigned long addr,
> > +					size_t len,
> > +					size_t *size, size_t *start,
> > +					unsigned maxpages)
> > +{
> > +	len += (*start = addr % PAGE_SIZE);
> 
> Ugh, I know you just copy-pasted this but can we rewrite this to:
> 
> 	*start = addr % PAGE_SIZE;
> 	len += *start;
> 
> I think that's easier to read.

Dealt with later in the series (around the unification and cleanups
of iov_iter_get_pages/iov_iter_get_pages_alloc).  We could do that
first, but I'd rather not mix that massage in here.
Alexander Gordeev July 28, 2022, 9:55 a.m. UTC | #5
On Wed, Jun 22, 2022 at 05:15:17AM +0100, Al Viro wrote:
> Equivalent of single-segment iovec.  Initialized by iov_iter_ubuf(),
> checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC
> ones.
> 
> We are going to expose the things like ->write_iter() et.al. to those
> in subsequent commits.
> 
> New predicate (user_backed_iter()) that is true for ITER_IOVEC and
> ITER_UBUF; places like direct-IO handling should use that for
> checking that pages we modify after getting them from iov_iter_get_pages()
> would need to be dirtied.
> 
> DO NOT assume that replacing iter_is_iovec() with user_backed_iter()
> will solve all problems - there's code that uses iter_is_iovec() to
> decide how to poke around in iov_iter guts and for that the predicate
> replacement obviously won't suffice.
> 
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> Link: https://lore.kernel.org/r/20220622041552.737754-9-viro@zeniv.linux.org.uk

Hi Al,

This changes causes sendfile09 LTP testcase fail in linux-next
(up to next-20220727) on s390. In fact, not this change exactly,
but rather 92d4d18eecb9 ("new iov_iter flavour - ITER_UBUF") -
which differs from what is posted here.

AFAICT page_cache_pipe_buf_confirm() encounters !PageUptodate()
and !page->mapping page and returns -ENODATA.

I am going to narrow the testcase and get more details, but please
let me know if I am missing something.

Thanks!
Al Viro July 29, 2022, 5:21 p.m. UTC | #6
On Thu, Jul 28, 2022 at 11:55:10AM +0200, Alexander Gordeev wrote:
> On Wed, Jun 22, 2022 at 05:15:17AM +0100, Al Viro wrote:
> > Equivalent of single-segment iovec.  Initialized by iov_iter_ubuf(),
> > checked for by iter_is_ubuf(), otherwise behaves like ITER_IOVEC
> > ones.
> > 
> > We are going to expose the things like ->write_iter() et.al. to those
> > in subsequent commits.
> > 
> > New predicate (user_backed_iter()) that is true for ITER_IOVEC and
> > ITER_UBUF; places like direct-IO handling should use that for
> > checking that pages we modify after getting them from iov_iter_get_pages()
> > would need to be dirtied.
> > 
> > DO NOT assume that replacing iter_is_iovec() with user_backed_iter()
> > will solve all problems - there's code that uses iter_is_iovec() to
> > decide how to poke around in iov_iter guts and for that the predicate
> > replacement obviously won't suffice.
> > 
> > Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> > Link: https://lore.kernel.org/r/20220622041552.737754-9-viro@zeniv.linux.org.uk
> 
> Hi Al,
> 
> This changes causes sendfile09 LTP testcase fail in linux-next
> (up to next-20220727) on s390. In fact, not this change exactly,
> but rather 92d4d18eecb9 ("new iov_iter flavour - ITER_UBUF") -
> which differs from what is posted here.
> 
> AFAICT page_cache_pipe_buf_confirm() encounters !PageUptodate()
> and !page->mapping page and returns -ENODATA.
> 
> I am going to narrow the testcase and get more details, but please
> let me know if I am missing something.

Grrr....

-               } else if (iter_is_iovec(to)) {
+               } else if (!user_backed_iter(to)) {

in mm/shmem.c.  Spot the typo...

Could you check if replacing that line with
		} else if (user_backed_iter(to)) {

fixes the breakage?
Alexander Gordeev July 29, 2022, 9:12 p.m. UTC | #7
On Fri, Jul 29, 2022 at 06:21:23PM +0100, Al Viro wrote:
> > Hi Al,
> > 
> > This changes causes sendfile09 LTP testcase fail in linux-next
> > (up to next-20220727) on s390. In fact, not this change exactly,
> > but rather 92d4d18eecb9 ("new iov_iter flavour - ITER_UBUF") -
> > which differs from what is posted here.
> > 
> > AFAICT page_cache_pipe_buf_confirm() encounters !PageUptodate()
> > and !page->mapping page and returns -ENODATA.
> > 
> > I am going to narrow the testcase and get more details, but please
> > let me know if I am missing something.
> 
> Grrr....
> 
> -               } else if (iter_is_iovec(to)) {
> +               } else if (!user_backed_iter(to)) {
> 
> in mm/shmem.c.  Spot the typo...
> 
> Could you check if replacing that line with
> 		} else if (user_backed_iter(to)) {
> 
> fixes the breakage?

Yes, it does! So just to be sure - this is the fix:

diff --git a/mm/shmem.c b/mm/shmem.c
index 8baf26eda989..5783f11351bb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2626,7 +2626,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			ret = copy_page_to_iter(page, offset, nr, to);
 			put_page(page);
 
-		} else if (!user_backed_iter(to)) {
+		} else if (user_backed_iter(to)) {
 			/*
 			 * Copy to user tends to be so well optimized, but
 			 * clear_user() not so much, that it is noticeably

Thanks!
Al Viro July 30, 2022, 12:03 a.m. UTC | #8
On Fri, Jul 29, 2022 at 11:12:45PM +0200, Alexander Gordeev wrote:
> On Fri, Jul 29, 2022 at 06:21:23PM +0100, Al Viro wrote:
> > > Hi Al,
> > > 
> > > This changes causes sendfile09 LTP testcase fail in linux-next
> > > (up to next-20220727) on s390. In fact, not this change exactly,
> > > but rather 92d4d18eecb9 ("new iov_iter flavour - ITER_UBUF") -
> > > which differs from what is posted here.
> > > 
> > > AFAICT page_cache_pipe_buf_confirm() encounters !PageUptodate()
> > > and !page->mapping page and returns -ENODATA.
> > > 
> > > I am going to narrow the testcase and get more details, but please
> > > let me know if I am missing something.
> > 
> > Grrr....
> > 
> > -               } else if (iter_is_iovec(to)) {
> > +               } else if (!user_backed_iter(to)) {
> > 
> > in mm/shmem.c.  Spot the typo...
> > 
> > Could you check if replacing that line with
> > 		} else if (user_backed_iter(to)) {
> > 
> > fixes the breakage?
> 
> Yes, it does! So just to be sure - this is the fix:

FWIW, there'd been another braino, caught by test from Hugh Dickins;
this one in ITER_PIPE: allocate buffers as we go in copy-to-pipe primitives

Incremental follows; folded and pushed out.

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 642841ce7595..939078ffbfb5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -469,7 +469,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 		struct page *page = append_pipe(i, n, &off);
 		chunk = min_t(size_t, n, PAGE_SIZE - off);
 		if (!page)
-			break;
+			return bytes - n;
 		memcpy_to_page(page, off, addr, chunk);
 		addr += chunk;
 	}
@@ -774,7 +774,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 		char *p;
 
 		if (!page)
-			break;
+			return bytes - n;
 		chunk = min_t(size_t, n, PAGE_SIZE - off);
 		p = kmap_local_page(page);
 		memset(p + off, 0, chunk);
diff --git a/mm/shmem.c b/mm/shmem.c
index 6b83f3971795..6c8a84a1fbbb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2603,7 +2603,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			ret = copy_page_to_iter(page, offset, nr, to);
 			put_page(page);
 
-		} else if (!user_backed_iter(to)) {
+		} else if (user_backed_iter(to)) {
 			/*
 			 * Copy to user tends to be so well optimized, but
 			 * clear_user() not so much, that it is noticeably
David Howells Aug. 1, 2022, 12:42 p.m. UTC | #9
You need to modify dup_iter() also.  That will go through the:

		return new->iov = kmemdup(new->iov,
				   new->nr_segs * sizeof(struct iovec),
				   flags);

case with a ubuf-class iterators, which will clobber new->ubuf.

David
Al Viro Aug. 1, 2022, 9:14 p.m. UTC | #10
On Mon, Aug 01, 2022 at 01:42:04PM +0100, David Howells wrote:
> You need to modify dup_iter() also.  That will go through the:
> 
> 		return new->iov = kmemdup(new->iov,
> 				   new->nr_segs * sizeof(struct iovec),
> 				   flags);
> 
> case with a ubuf-class iterators, which will clobber new->ubuf.
> 
> David

Fixed, folded and pushed out.  Incremental:

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 939078ffbfb5..46ec07886d7b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1659,17 +1659,16 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 		WARN_ON(1);
 		return NULL;
 	}
-	if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
-		return NULL;
 	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
 				    flags);
-	else
+	else if (iov_iter_is_kvec(new) || iter_is_iovec(new))
 		/* iovec and kvec have identical layout */
 		return new->iov = kmemdup(new->iov,
 				   new->nr_segs * sizeof(struct iovec),
 				   flags);
+	return NULL;
 }
 EXPORT_SYMBOL(dup_iter);
David Howells Aug. 1, 2022, 10:54 p.m. UTC | #11
Al Viro <viro@zeniv.linux.org.uk> wrote:

>  	if (iov_iter_is_bvec(new))
>  		return new->bvec = kmemdup(new->bvec,
>  				    new->nr_segs * sizeof(struct bio_vec),
>  				    flags);
> -	else
> +	else if (iov_iter_is_kvec(new) || iter_is_iovec(new))

The else is redundant.

David
diff mbox series

Patch

diff --git a/block/fops.c b/block/fops.c
index 6e86931ab847..3e68d69e0ee3 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -69,7 +69,7 @@  static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 
 	if (iov_iter_rw(iter) == READ) {
 		bio_init(&bio, bdev, vecs, nr_pages, REQ_OP_READ);
-		if (iter_is_iovec(iter))
+		if (user_backed_iter(iter))
 			should_dirty = true;
 	} else {
 		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
@@ -199,7 +199,7 @@  static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	}
 
 	dio->size = 0;
-	if (is_read && iter_is_iovec(iter))
+	if (is_read && user_backed_iter(iter))
 		dio->flags |= DIO_SHOULD_DIRTY;
 
 	blk_start_plug(&plug);
@@ -331,7 +331,7 @@  static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	dio->size = bio->bi_iter.bi_size;
 
 	if (is_read) {
-		if (iter_is_iovec(iter)) {
+		if (user_backed_iter(iter)) {
 			dio->flags |= DIO_SHOULD_DIRTY;
 			bio_set_pages_dirty(bio);
 		}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8c8226c0feac..e132adeeaf16 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1262,7 +1262,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 	size_t count = iov_iter_count(iter);
 	loff_t pos = iocb->ki_pos;
 	bool write = iov_iter_rw(iter) == WRITE;
-	bool should_dirty = !write && iter_is_iovec(iter);
+	bool should_dirty = !write && user_backed_iter(iter);
 
 	if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
 		return -EROFS;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1618e0537d58..4b4129d9a90c 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -4004,7 +4004,7 @@  static ssize_t __cifs_readv(
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (iter_is_iovec(to))
+	if (user_backed_iter(to))
 		ctx->should_dirty = true;
 
 	if (direct) {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 39647eb56904..72237f49ad94 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1245,7 +1245,7 @@  ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
-	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
+	dio->should_dirty = user_backed_iter(iter) && iov_iter_rw(iter) == READ;
 	sdio.iter = iter;
 	sdio.final_block_in_request = end >> blkbits;
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 0e537e580dc1..8d657c2cd6f7 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1356,7 +1356,7 @@  static ssize_t fuse_dev_read(struct kiocb *iocb, struct iov_iter *to)
 	if (!fud)
 		return -EPERM;
 
-	if (!iter_is_iovec(to))
+	if (!user_backed_iter(to))
 		return -EINVAL;
 
 	fuse_copy_init(&cs, 1, to);
@@ -1949,7 +1949,7 @@  static ssize_t fuse_dev_write(struct kiocb *iocb, struct iov_iter *from)
 	if (!fud)
 		return -EPERM;
 
-	if (!iter_is_iovec(from))
+	if (!user_backed_iter(from))
 		return -EINVAL;
 
 	fuse_copy_init(&cs, 0, from);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 00fa861aeead..c982e3afe3b4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1465,7 +1465,7 @@  ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 			inode_unlock(inode);
 	}
 
-	io->should_dirty = !write && iter_is_iovec(iter);
+	io->should_dirty = !write && user_backed_iter(iter);
 	while (count) {
 		ssize_t nres;
 		fl_owner_t owner = current->files;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 2cceb193dcd8..48e6cc74fdc1 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -780,7 +780,7 @@  static inline bool should_fault_in_pages(struct iov_iter *i,
 
 	if (!count)
 		return false;
-	if (!iter_is_iovec(i))
+	if (!user_backed_iter(i))
 		return false;
 
 	size = PAGE_SIZE;
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 31c7f1035b20..d5c7d019653b 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -533,7 +533,7 @@  __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 			iomi.flags |= IOMAP_NOWAIT;
 		}
 
-		if (iter_is_iovec(iter))
+		if (user_backed_iter(iter))
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
 		iomi.flags |= IOMAP_WRITE;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 4eb2a8380a28..022e1ce63e62 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -478,7 +478,7 @@  ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	if (iter_is_iovec(iter))
+	if (user_backed_iter(iter))
 		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
 
 	if (!swap)
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 76d305f3d4c2..6ab4260c3d6c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,7 @@  enum iter_type {
 	ITER_PIPE,
 	ITER_XARRAY,
 	ITER_DISCARD,
+	ITER_UBUF,
 };
 
 struct iov_iter_state {
@@ -38,6 +39,7 @@  struct iov_iter {
 	u8 iter_type;
 	bool nofault;
 	bool data_source;
+	bool user_backed;
 	size_t iov_offset;
 	size_t count;
 	union {
@@ -46,6 +48,7 @@  struct iov_iter {
 		const struct bio_vec *bvec;
 		struct xarray *xarray;
 		struct pipe_inode_info *pipe;
+		void __user *ubuf;
 	};
 	union {
 		unsigned long nr_segs;
@@ -70,6 +73,11 @@  static inline void iov_iter_save_state(struct iov_iter *iter,
 	state->nr_segs = iter->nr_segs;
 }
 
+static inline bool iter_is_ubuf(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_UBUF;
+}
+
 static inline bool iter_is_iovec(const struct iov_iter *i)
 {
 	return iov_iter_type(i) == ITER_IOVEC;
@@ -105,6 +113,11 @@  static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 	return i->data_source ? WRITE : READ;
 }
 
+static inline bool user_backed_iter(const struct iov_iter *i)
+{
+	return i->user_backed;
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -320,4 +333,17 @@  ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
+static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
+			void __user *buf, size_t count)
+{
+	WARN_ON(direction & ~(READ | WRITE));
+	*i = (struct iov_iter) {
+		.iter_type = ITER_UBUF,
+		.user_backed = true,
+		.data_source = direction,
+		.ubuf = buf,
+		.count = count
+	};
+}
+
 #endif
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 4c658a25e29c..8275b28e886b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -16,6 +16,16 @@ 
 
 #define PIPE_PARANOIA /* for now */
 
+/* covers ubuf and kbuf alike */
+#define iterate_buf(i, n, base, len, off, __p, STEP) {		\
+	size_t __maybe_unused off = 0;				\
+	len = n;						\
+	base = __p + i->iov_offset;				\
+	len -= (STEP);						\
+	i->iov_offset += len;					\
+	n = len;						\
+}
+
 /* covers iovec and kvec alike */
 #define iterate_iovec(i, n, base, len, off, __p, STEP) {	\
 	size_t off = 0;						\
@@ -110,7 +120,12 @@  __out:								\
 	if (unlikely(i->count < n))				\
 		n = i->count;					\
 	if (likely(n)) {					\
-		if (likely(iter_is_iovec(i))) {			\
+		if (likely(iter_is_ubuf(i))) {			\
+			void __user *base;			\
+			size_t len;				\
+			iterate_buf(i, n, base, len, off,	\
+						i->ubuf, (I)) 	\
+		} else if (likely(iter_is_iovec(i))) {		\
 			const struct iovec *iov = i->iov;	\
 			void __user *base;			\
 			size_t len;				\
@@ -275,7 +290,11 @@  static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
  */
 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
 {
-	if (iter_is_iovec(i)) {
+	if (iter_is_ubuf(i)) {
+		size_t n = min(size, iov_iter_count(i));
+		n -= fault_in_readable(i->ubuf + i->iov_offset, n);
+		return size - n;
+	} else if (iter_is_iovec(i)) {
 		size_t count = min(size, iov_iter_count(i));
 		const struct iovec *p;
 		size_t skip;
@@ -314,7 +333,11 @@  EXPORT_SYMBOL(fault_in_iov_iter_readable);
  */
 size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
 {
-	if (iter_is_iovec(i)) {
+	if (iter_is_ubuf(i)) {
+		size_t n = min(size, iov_iter_count(i));
+		n -= fault_in_safe_writeable(i->ubuf + i->iov_offset, n);
+		return size - n;
+	} else if (iter_is_iovec(i)) {
 		size_t count = min(size, iov_iter_count(i));
 		const struct iovec *p;
 		size_t skip;
@@ -345,6 +368,7 @@  void iov_iter_init(struct iov_iter *i, unsigned int direction,
 	*i = (struct iov_iter) {
 		.iter_type = ITER_IOVEC,
 		.nofault = false,
+		.user_backed = true,
 		.data_source = direction,
 		.iov = iov,
 		.nr_segs = nr_segs,
@@ -494,7 +518,7 @@  size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (user_backed_iter(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyout(base, addr + off, len),
@@ -576,7 +600,7 @@  size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_mc_pipe_to_iter(addr, bytes, i);
-	if (iter_is_iovec(i))
+	if (user_backed_iter(i))
 		might_fault();
 	__iterate_and_advance(i, bytes, base, len, off,
 		copyout_mc(base, addr + off, len),
@@ -594,7 +618,7 @@  size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 		WARN_ON(1);
 		return 0;
 	}
-	if (iter_is_iovec(i))
+	if (user_backed_iter(i))
 		might_fault();
 	iterate_and_advance(i, bytes, base, len, off,
 		copyin(addr + off, base, len),
@@ -882,16 +906,16 @@  void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(i->count < size))
 		size = i->count;
-	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
+	if (likely(iter_is_ubuf(i)) || unlikely(iov_iter_is_xarray(i))) {
+		i->iov_offset += size;
+		i->count -= size;
+	} else if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
 		/* iovec and kvec have identical layouts */
 		iov_iter_iovec_advance(i, size);
 	} else if (iov_iter_is_bvec(i)) {
 		iov_iter_bvec_advance(i, size);
 	} else if (iov_iter_is_pipe(i)) {
 		pipe_advance(i, size);
-	} else if (unlikely(iov_iter_is_xarray(i))) {
-		i->iov_offset += size;
-		i->count -= size;
 	} else if (iov_iter_is_discard(i)) {
 		i->count -= size;
 	}
@@ -938,7 +962,7 @@  void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		return;
 	}
 	unroll -= i->iov_offset;
-	if (iov_iter_is_xarray(i)) {
+	if (iov_iter_is_xarray(i) || iter_is_ubuf(i)) {
 		BUG(); /* We should never go beyond the start of the specified
 			* range since we might then be straying into pages that
 			* aren't pinned.
@@ -1129,6 +1153,13 @@  static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
 
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
+	if (likely(iter_is_ubuf(i))) {
+		size_t size = i->count;
+		if (size)
+			return ((unsigned long)i->ubuf + i->iov_offset) | size;
+		return 0;
+	}
+
 	/* iovec and kvec have identical layouts */
 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
 		return iov_iter_alignment_iovec(i);
@@ -1159,6 +1190,9 @@  unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 	size_t size = i->count;
 	unsigned k;
 
+	if (iter_is_ubuf(i))
+		return 0;
+
 	if (WARN_ON(!iter_is_iovec(i)))
 		return ~0U;
 
@@ -1287,7 +1321,19 @@  static ssize_t iter_xarray_get_pages(struct iov_iter *i,
 	return actual;
 }
 
-/* must be done on non-empty ITER_IOVEC one */
+static unsigned long found_ubuf_segment(unsigned long addr,
+					size_t len,
+					size_t *size, size_t *start,
+					unsigned maxpages)
+{
+	len += (*start = addr % PAGE_SIZE);
+	if (len > maxpages * PAGE_SIZE)
+		len = maxpages * PAGE_SIZE;
+	*size = len;
+	return addr & PAGE_MASK;
+}
+
+/* must be done on non-empty ITER_UBUF or ITER_IOVEC one */
 static unsigned long first_iovec_segment(const struct iov_iter *i,
 					 size_t *size, size_t *start,
 					 size_t maxsize, unsigned maxpages)
@@ -1295,6 +1341,11 @@  static unsigned long first_iovec_segment(const struct iov_iter *i,
 	size_t skip;
 	long k;
 
+	if (iter_is_ubuf(i)) {
+		unsigned long addr = (unsigned long)i->ubuf + i->iov_offset;
+		return found_ubuf_segment(addr, maxsize, size, start, maxpages);
+	}
+
 	for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
 		unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
 		size_t len = i->iov[k].iov_len - skip;
@@ -1303,11 +1354,7 @@  static unsigned long first_iovec_segment(const struct iov_iter *i,
 			continue;
 		if (len > maxsize)
 			len = maxsize;
-		len += (*start = addr % PAGE_SIZE);
-		if (len > maxpages * PAGE_SIZE)
-			len = maxpages * PAGE_SIZE;
-		*size = len;
-		return addr & PAGE_MASK;
+		return found_ubuf_segment(addr, len, size, start, maxpages);
 	}
 	BUG(); // if it had been empty, we wouldn't get called
 }
@@ -1344,7 +1391,7 @@  ssize_t iov_iter_get_pages(struct iov_iter *i,
 	if (!maxsize)
 		return 0;
 
-	if (likely(iter_is_iovec(i))) {
+	if (likely(user_backed_iter(i))) {
 		unsigned int gup_flags = 0;
 		unsigned long addr;
 
@@ -1470,7 +1517,7 @@  ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	if (!maxsize)
 		return 0;
 
-	if (likely(iter_is_iovec(i))) {
+	if (likely(user_backed_iter(i))) {
 		unsigned int gup_flags = 0;
 		unsigned long addr;
 
@@ -1624,6 +1671,11 @@  int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	if (unlikely(!i->count))
 		return 0;
+	if (likely(iter_is_ubuf(i))) {
+		unsigned offs = offset_in_page(i->ubuf + i->iov_offset);
+		int npages = DIV_ROUND_UP(offs + i->count, PAGE_SIZE);
+		return min(npages, maxpages);
+	}
 	/* iovec and kvec have identical layouts */
 	if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
 		return iov_npages(i, maxpages);
@@ -1862,10 +1914,12 @@  EXPORT_SYMBOL(import_single_range);
 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
 {
 	if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
-			 !iov_iter_is_kvec(i))
+			 !iov_iter_is_kvec(i) && !iter_is_ubuf(i))
 		return;
 	i->iov_offset = state->iov_offset;
 	i->count = state->count;
+	if (iter_is_ubuf(i))
+		return;
 	/*
 	 * For the *vec iters, nr_segs + iov is constant - if we increment
 	 * the vec, then we also decrement the nr_segs count. Hence we don't
diff --git a/mm/shmem.c b/mm/shmem.c
index a6f565308133..6b83f3971795 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2603,7 +2603,7 @@  static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 			ret = copy_page_to_iter(page, offset, nr, to);
 			put_page(page);
 
-		} else if (iter_is_iovec(to)) {
+		} else if (!user_backed_iter(to)) {
 			/*
 			 * Copy to user tends to be so well optimized, but
 			 * clear_user() not so much, that it is noticeably