diff mbox series

[v6,03/34] iov_iter: Pass I/O direction into iov_iter_get_pages*()

Message ID 167391050409.2311931.7103784292954267373.stgit@warthog.procyon.org.uk (mailing list archive)
State New, archived
Headers show
Series iov_iter: Improve page extraction (ref, pin or just list) | expand

Commit Message

David Howells Jan. 16, 2023, 11:08 p.m. UTC
Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
operation.  Don't use READ and WRITE as a read I/O writes to memory and
vice versa - which causes confusion.

The direction is checked against the iterator's data_source.

Signed-off-by: David Howells <dhowells@redhat.com>
---

 block/bio.c             |    6 ++++++
 block/blk-map.c         |    2 ++
 crypto/af_alg.c         |    9 ++++++---
 crypto/algif_hash.c     |    3 ++-
 drivers/vhost/scsi.c    |    9 ++++++---
 fs/ceph/addr.c          |    2 +-
 fs/ceph/file.c          |   14 ++++++++------
 fs/cifs/file.c          |    8 ++++----
 fs/cifs/misc.c          |    3 ++-
 fs/direct-io.c          |    6 ++++--
 fs/fuse/dev.c           |    3 ++-
 fs/fuse/file.c          |    8 ++++----
 fs/nfs/direct.c         |   10 ++++++----
 fs/splice.c             |    3 ++-
 include/crypto/if_alg.h |    3 ++-
 include/linux/bio.h     |   18 ++++++++++++++++--
 include/linux/mm.h      |   10 ++++++++++
 lib/iov_iter.c          |   14 +++++++-------
 net/9p/trans_virtio.c   |   12 ++++++++----
 net/core/datagram.c     |    5 +++--
 net/core/skmsg.c        |    4 ++--
 net/rds/message.c       |    4 ++--
 net/tls/tls_sw.c        |    5 ++---
 23 files changed, 107 insertions(+), 54 deletions(-)

Comments

Christoph Hellwig Jan. 17, 2023, 7:57 a.m. UTC | #1
On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
> Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
> and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
> operation.  Don't use READ and WRITE as a read I/O writes to memory and
> vice versa - which causes confusion.
> 
> The direction is checked against the iterator's data_source.

Why can't we use the existing FOLL_WRITE?

> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> ---
> 
>  block/bio.c             |    6 ++++++
>  block/blk-map.c         |    2 ++
>  crypto/af_alg.c         |    9 ++++++---
>  crypto/algif_hash.c     |    3 ++-
>  drivers/vhost/scsi.c    |    9 ++++++---
>  fs/ceph/addr.c          |    2 +-
>  fs/ceph/file.c          |   14 ++++++++------
>  fs/cifs/file.c          |    8 ++++----
>  fs/cifs/misc.c          |    3 ++-
>  fs/direct-io.c          |    6 ++++--
>  fs/fuse/dev.c           |    3 ++-
>  fs/fuse/file.c          |    8 ++++----
>  fs/nfs/direct.c         |   10 ++++++----
>  fs/splice.c             |    3 ++-
>  include/crypto/if_alg.h |    3 ++-
>  include/linux/bio.h     |   18 ++++++++++++++++--
>  include/linux/mm.h      |   10 ++++++++++
>  lib/iov_iter.c          |   14 +++++++-------
>  net/9p/trans_virtio.c   |   12 ++++++++----
>  net/core/datagram.c     |    5 +++--
>  net/core/skmsg.c        |    4 ++--
>  net/rds/message.c       |    4 ++--
>  net/tls/tls_sw.c        |    5 ++---
>  23 files changed, 107 insertions(+), 54 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 5f96fcae3f75..867cf4db87ea 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1242,6 +1242,8 @@ static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
>   * pages will have to be released using put_page() when done.
>   * For multi-segment *iter, this function only adds pages from the
>   * next non-empty segment of the iov iterator.
> + *
> + * The I/O direction is determined from the bio operation type.
>   */
>  static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  {
> @@ -1263,6 +1265,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
>  	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
>  
> +	gup_flags |= bio_is_write(bio) ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
> +
>  	if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
>  		gup_flags |= FOLL_PCI_P2PDMA;
>  
> @@ -1332,6 +1336,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>   * fit into the bio, or are requested in @iter, whatever is smaller. If
>   * MM encounters an error pinning the requested pages, it stops. Error
>   * is returned only if 0 pages could be pinned.
> + *
> + * The bio operation indicates the data direction.
>   */
>  int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  {
> diff --git a/block/blk-map.c b/block/blk-map.c
> index 08cbb7ff3b19..c30be529fb55 100644
> --- a/block/blk-map.c
> +++ b/block/blk-map.c
> @@ -279,6 +279,8 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
>  	if (bio == NULL)
>  		return -ENOMEM;
>  
> +	gup_flags |= bio_is_write(bio) ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
> +
>  	if (blk_queue_pci_p2pdma(rq->q))
>  		gup_flags |= FOLL_PCI_P2PDMA;
>  
> diff --git a/crypto/af_alg.c b/crypto/af_alg.c
> index 0a4fa2a429e2..7a68db157fae 100644
> --- a/crypto/af_alg.c
> +++ b/crypto/af_alg.c
> @@ -531,13 +531,15 @@ static const struct net_proto_family alg_family = {
>  	.owner	=	THIS_MODULE,
>  };
>  
> -int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
> +int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len,
> +		   unsigned int gup_flags)
>  {
>  	size_t off;
>  	ssize_t n;
>  	int npages, i;
>  
> -	n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
> +	n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off,
> +			       gup_flags);
>  	if (n < 0)
>  		return n;
>  
> @@ -1310,7 +1312,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
>  		list_add_tail(&rsgl->list, &areq->rsgl_list);
>  
>  		/* make one iovec available as scatterlist */
> -		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
> +		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen,
> +				     FOLL_DEST_BUF);
>  		if (err < 0) {
>  			rsgl->sg_num_bytes = 0;
>  			return err;
> diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
> index 1d017ec5c63c..fe3d2258145f 100644
> --- a/crypto/algif_hash.c
> +++ b/crypto/algif_hash.c
> @@ -91,7 +91,8 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
>  		if (len > limit)
>  			len = limit;
>  
> -		len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
> +		len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len,
> +				     FOLL_SOURCE_BUF);
>  		if (len < 0) {
>  			err = copied ? 0 : len;
>  			goto unlock;
> diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
> index dca6346d75b3..5d10837d19ec 100644
> --- a/drivers/vhost/scsi.c
> +++ b/drivers/vhost/scsi.c
> @@ -646,10 +646,13 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
>  	struct scatterlist *sg = sgl;
>  	ssize_t bytes;
>  	size_t offset;
> -	unsigned int npages = 0;
> +	unsigned int npages = 0, gup_flags = 0;
>  
> -	bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
> -				VHOST_SCSI_PREALLOC_UPAGES, &offset);
> +	gup_flags |= write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
> +
> +	bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
> +				   VHOST_SCSI_PREALLOC_UPAGES, &offset,
> +				   gup_flags);
>  	/* No pages were pinned */
>  	if (bytes <= 0)
>  		return bytes < 0 ? bytes : -EFAULT;
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 8c74871e37c9..cfc3353e5604 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -328,7 +328,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
>  
>  	dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
>  	iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
> -	err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
> +	err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off, FOLL_DEST_BUF);
>  	if (err < 0) {
>  		dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
>  		goto out;
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 27c72a2f6af5..ffd36eeea186 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -81,7 +81,7 @@ static __le32 ceph_flags_sys2wire(u32 flags)
>  #define ITER_GET_BVECS_PAGES	64
>  
>  static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
> -				struct bio_vec *bvecs)
> +				struct bio_vec *bvecs, bool write)
>  {
>  	size_t size = 0;
>  	int bvec_idx = 0;
> @@ -95,8 +95,9 @@ static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
>  		size_t start;
>  		int idx = 0;
>  
> -		bytes = iov_iter_get_pages2(iter, pages, maxsize - size,
> -					   ITER_GET_BVECS_PAGES, &start);
> +		bytes = iov_iter_get_pages(iter, pages, maxsize - size,
> +					   ITER_GET_BVECS_PAGES, &start,
> +					   write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
>  		if (bytes < 0)
>  			return size ?: bytes;
>  
> @@ -127,7 +128,8 @@ static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
>   * Return the number of bytes in the created bio_vec array, or an error.
>   */
>  static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
> -				    struct bio_vec **bvecs, int *num_bvecs)
> +				    struct bio_vec **bvecs, int *num_bvecs,
> +				    bool write)
>  {
>  	struct bio_vec *bv;
>  	size_t orig_count = iov_iter_count(iter);
> @@ -146,7 +148,7 @@ static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
>  	if (!bv)
>  		return -ENOMEM;
>  
> -	bytes = __iter_get_bvecs(iter, maxsize, bv);
> +	bytes = __iter_get_bvecs(iter, maxsize, bv, write);
>  	if (bytes < 0) {
>  		/*
>  		 * No pages were pinned -- just free the array.
> @@ -1334,7 +1336,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
>  			break;
>  		}
>  
> -		len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
> +		len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages, write);
>  		if (len < 0) {
>  			ceph_osdc_put_request(req);
>  			ret = len;
> diff --git a/fs/cifs/file.c b/fs/cifs/file.c
> index 22dfc1f8b4f1..d100b9cb8682 100644
> --- a/fs/cifs/file.c
> +++ b/fs/cifs/file.c
> @@ -3290,8 +3290,8 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
>  		if (ctx->direct_io) {
>  			ssize_t result;
>  
> -			result = iov_iter_get_pages_alloc2(
> -				from, &pagevec, cur_len, &start);
> +			result = iov_iter_get_pages_alloc(
> +				from, &pagevec, cur_len, &start, FOLL_SOURCE_BUF);
>  			if (result < 0) {
>  				cifs_dbg(VFS,
>  					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
> @@ -4031,9 +4031,9 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
>  		if (ctx->direct_io) {
>  			ssize_t result;
>  
> -			result = iov_iter_get_pages_alloc2(
> +			result = iov_iter_get_pages_alloc(
>  					&direct_iov, &pagevec,
> -					cur_len, &start);
> +					cur_len, &start, FOLL_DEST_BUF);
>  			if (result < 0) {
>  				cifs_dbg(VFS,
>  					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
> diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
> index 4d3c586785a5..9655cf359ab9 100644
> --- a/fs/cifs/misc.c
> +++ b/fs/cifs/misc.c
> @@ -1030,7 +1030,8 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
>  	saved_len = count;
>  
>  	while (count && npages < max_pages) {
> -		rc = iov_iter_get_pages2(iter, pages, count, max_pages, &start);
> +		rc = iov_iter_get_pages(iter, pages, count, max_pages, &start,
> +					rw == WRITE ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
>  		if (rc < 0) {
>  			cifs_dbg(VFS, "Couldn't get user pages (rc=%zd)\n", rc);
>  			break;
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index cf196f2a211e..b1e26a706e31 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -169,8 +169,10 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
>  	const enum req_op dio_op = dio->opf & REQ_OP_MASK;
>  	ssize_t ret;
>  
> -	ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
> -				&sdio->from);
> +	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
> +				 &sdio->from,
> +				 op_is_write(dio_op) ?
> +				 FOLL_SOURCE_BUF : FOLL_DEST_BUF);
>  
>  	if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) {
>  		struct page *page = ZERO_PAGE(0);
> diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
> index e8b60ce72c9a..e3d8443e24a6 100644
> --- a/fs/fuse/dev.c
> +++ b/fs/fuse/dev.c
> @@ -730,7 +730,8 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
>  		}
>  	} else {
>  		size_t off;
> -		err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
> +		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off,
> +					 cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
>  		if (err < 0)
>  			return err;
>  		BUG_ON(!err);
> diff --git a/fs/fuse/file.c b/fs/fuse/file.c
> index d68b45f8b3ae..68c196437306 100644
> --- a/fs/fuse/file.c
> +++ b/fs/fuse/file.c
> @@ -1414,10 +1414,10 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
>  	while (nbytes < *nbytesp && ap->num_pages < max_pages) {
>  		unsigned npages;
>  		size_t start;
> -		ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
> -					*nbytesp - nbytes,
> -					max_pages - ap->num_pages,
> -					&start);
> +		ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
> +					 *nbytesp - nbytes,
> +					 max_pages - ap->num_pages,
> +					 &start, write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
>  		if (ret < 0)
>  			break;
>  
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index d865945f2a63..42af84685f20 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -332,8 +332,9 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
>  		size_t pgbase;
>  		unsigned npages, i;
>  
> -		result = iov_iter_get_pages_alloc2(iter, &pagevec,
> -						  rsize, &pgbase);
> +		result = iov_iter_get_pages_alloc(iter, &pagevec,
> +						  rsize, &pgbase,
> +						  FOLL_DEST_BUF);
>  		if (result < 0)
>  			break;
>  	
> @@ -791,8 +792,9 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
>  		size_t pgbase;
>  		unsigned npages, i;
>  
> -		result = iov_iter_get_pages_alloc2(iter, &pagevec,
> -						  wsize, &pgbase);
> +		result = iov_iter_get_pages_alloc(iter, &pagevec,
> +						  wsize, &pgbase,
> +						  FOLL_SOURCE_BUF);
>  		if (result < 0)
>  			break;
>  
> diff --git a/fs/splice.c b/fs/splice.c
> index 5969b7a1d353..19c5b5adc548 100644
> --- a/fs/splice.c
> +++ b/fs/splice.c
> @@ -1165,7 +1165,8 @@ static int iter_to_pipe(struct iov_iter *from,
>  		size_t start;
>  		int i, n;
>  
> -		left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start);
> +		left = iov_iter_get_pages(from, pages, ~0UL, 16, &start,
> +					  FOLL_SOURCE_BUF);
>  		if (left <= 0) {
>  			ret = left;
>  			break;
> diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
> index a5db86670bdf..12058ab6cad9 100644
> --- a/include/crypto/if_alg.h
> +++ b/include/crypto/if_alg.h
> @@ -165,7 +165,8 @@ int af_alg_release(struct socket *sock);
>  void af_alg_release_parent(struct sock *sk);
>  int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
>  
> -int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
> +int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len,
> +		   unsigned int gup_flags);
>  void af_alg_free_sg(struct af_alg_sgl *sgl);
>  
>  static inline struct alg_sock *alg_sk(struct sock *sk)
> diff --git a/include/linux/bio.h b/include/linux/bio.h
> index 22078a28d7cb..3f7ba7fe48ac 100644
> --- a/include/linux/bio.h
> +++ b/include/linux/bio.h
> @@ -40,11 +40,25 @@ static inline unsigned int bio_max_segs(unsigned int nr_segs)
>  #define bio_sectors(bio)	bvec_iter_sectors((bio)->bi_iter)
>  #define bio_end_sector(bio)	bvec_iter_end_sector((bio)->bi_iter)
>  
> +/**
> + * bio_is_write - Query if the I/O direction is towards the disk
> + * @bio: The bio to query
> + *
> + * Return true if this is some sort of write operation - ie. the data is going
> + * towards the disk.
> + */
> +static inline bool bio_is_write(const struct bio *bio)
> +{
> +	return op_is_write(bio_op(bio));
> +}
> +
>  /*
>   * Return the data direction, READ or WRITE.
>   */
> -#define bio_data_dir(bio) \
> -	(op_is_write(bio_op(bio)) ? WRITE : READ)
> +static inline int bio_data_dir(const struct bio *bio)
> +{
> +	return bio_is_write(bio) ? WRITE : READ;
> +}
>  
>  /*
>   * Check whether this bio carries any data or not. A NULL bio is allowed.
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index f3f196e4d66d..3af4ca8b1fe7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3090,6 +3090,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>  #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
>  #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
>  
> +#define FOLL_SOURCE_BUF	0		/* Memory will be read from by I/O */
> +#define FOLL_DEST_BUF	FOLL_WRITE	/* Memory will be written to by I/O */
> +#define FOLL_BUF_MASK	FOLL_WRITE
> +
>  /*
>   * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
>   * other. Here is what they mean, and how to use them:
> @@ -3143,6 +3147,12 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
>   * releasing pages: get_user_pages*() pages must be released via put_page(),
>   * while pin_user_pages*() pages must be released via unpin_user_page().
>   *
> + * FOLL_SOURCE_BUF and FOLL_DEST_BUF are indicators to get_user_pages*() and
> + * iov_iter_*_pages*() as to how the pages obtained are going to be used.
> + * FOLL_SOURCE_BUF indicates that I/O op is going to transfer from memory to
> + * device; FOLL_DEST_BUF that the op is going to transfer from device to
> + * memory.
> + *
>   * Please see Documentation/core-api/pin_user_pages.rst for more information.
>   */
>  
> diff --git a/lib/iov_iter.c b/lib/iov_iter.c
> index 68497d9c1452..f53583836009 100644
> --- a/lib/iov_iter.c
> +++ b/lib/iov_iter.c
> @@ -1429,11 +1429,6 @@ static struct page *first_bvec_segment(const struct iov_iter *i,
>  	return page;
>  }
>  
> -static unsigned char iov_iter_rw(const struct iov_iter *i)
> -{
> -	return i->data_source ? WRITE : READ;
> -}
> -
>  static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
>  		   struct page ***pages, size_t maxsize,
>  		   unsigned int maxpages, size_t *start,
> @@ -1448,12 +1443,17 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
>  	if (maxsize > MAX_RW_COUNT)
>  		maxsize = MAX_RW_COUNT;
>  
> +	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_SOURCE_BUF &&
> +			 i->data_source == ITER_DEST))
> +		return -EIO;
> +	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_DEST_BUF &&
> +			 i->data_source == ITER_SOURCE))
> +		return -EIO;
> +
>  	if (likely(user_backed_iter(i))) {
>  		unsigned long addr;
>  		int res;
>  
> -		if (iov_iter_rw(i) != WRITE)
> -			gup_flags |= FOLL_WRITE;
>  		if (i->nofault)
>  			gup_flags |= FOLL_NOFAULT;
>  
> diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
> index 3c27ffb781e3..eb28b54fe5f6 100644
> --- a/net/9p/trans_virtio.c
> +++ b/net/9p/trans_virtio.c
> @@ -310,7 +310,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
>  			       struct iov_iter *data,
>  			       int count,
>  			       size_t *offs,
> -			       int *need_drop)
> +			       int *need_drop,
> +			       unsigned int gup_flags)
>  {
>  	int nr_pages;
>  	int err;
> @@ -330,7 +331,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
>  			if (err == -ERESTARTSYS)
>  				return err;
>  		}
> -		n = iov_iter_get_pages_alloc2(data, pages, count, offs);
> +		n = iov_iter_get_pages_alloc(data, pages, count, offs,
> +					     gup_flags);
>  		if (n < 0)
>  			return n;
>  		*need_drop = 1;
> @@ -437,7 +439,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
>  	if (uodata) {
>  		__le32 sz;
>  		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
> -					    outlen, &offs, &need_drop);
> +					    outlen, &offs, &need_drop,
> +					    FOLL_DEST_BUF);
>  		if (n < 0) {
>  			err = n;
>  			goto err_out;
> @@ -456,7 +459,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
>  		memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
>  	} else if (uidata) {
>  		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
> -					    inlen, &offs, &need_drop);
> +					    inlen, &offs, &need_drop,
> +					    FOLL_SOURCE_BUF);
>  		if (n < 0) {
>  			err = n;
>  			goto err_out;
> diff --git a/net/core/datagram.c b/net/core/datagram.c
> index e4ff2db40c98..9f0914b781ad 100644
> --- a/net/core/datagram.c
> +++ b/net/core/datagram.c
> @@ -632,8 +632,9 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
>  		if (frag == MAX_SKB_FRAGS)
>  			return -EMSGSIZE;
>  
> -		copied = iov_iter_get_pages2(from, pages, length,
> -					    MAX_SKB_FRAGS - frag, &start);
> +		copied = iov_iter_get_pages(from, pages, length,
> +					    MAX_SKB_FRAGS - frag, &start,
> +					    FOLL_SOURCE_BUF);
>  		if (copied < 0)
>  			return -EFAULT;
>  
> diff --git a/net/core/skmsg.c b/net/core/skmsg.c
> index 53d0251788aa..f63a13690712 100644
> --- a/net/core/skmsg.c
> +++ b/net/core/skmsg.c
> @@ -324,8 +324,8 @@ int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
>  			goto out;
>  		}
>  
> -		copied = iov_iter_get_pages2(from, pages, bytes, maxpages,
> -					    &offset);
> +		copied = iov_iter_get_pages(from, pages, bytes, maxpages,
> +					    &offset, FOLL_SOURCE_BUF);
>  		if (copied <= 0) {
>  			ret = -EFAULT;
>  			goto out;
> diff --git a/net/rds/message.c b/net/rds/message.c
> index b47e4f0a1639..fcfd406b97af 100644
> --- a/net/rds/message.c
> +++ b/net/rds/message.c
> @@ -390,8 +390,8 @@ static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
>  		size_t start;
>  		ssize_t copied;
>  
> -		copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE,
> -					    1, &start);
> +		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
> +					    1, &start, FOLL_SOURCE_BUF);
>  		if (copied < 0) {
>  			struct mmpin *mmp;
>  			int i;
> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> index 9ed978634125..59acaeb24f54 100644
> --- a/net/tls/tls_sw.c
> +++ b/net/tls/tls_sw.c
> @@ -1354,9 +1354,8 @@ static int tls_setup_from_iter(struct iov_iter *from,
>  			rc = -EFAULT;
>  			goto out;
>  		}
> -		copied = iov_iter_get_pages2(from, pages,
> -					    length,
> -					    maxpages, &offset);
> +		copied = iov_iter_get_pages(from, pages, length,
> +					    maxpages, &offset, FOLL_DEST_BUF);
>  		if (copied <= 0) {
>  			rc = -EFAULT;
>  			goto out;
> 
> 
---end quoted text---
David Hildenbrand Jan. 17, 2023, 8:07 a.m. UTC | #2
On 17.01.23 08:57, Christoph Hellwig wrote:
> On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
>> Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
>> and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
>> operation.  Don't use READ and WRITE as a read I/O writes to memory and
>> vice versa - which causes confusion.
>>
>> The direction is checked against the iterator's data_source.
> 
> Why can't we use the existing FOLL_WRITE?

Agreed. What I understand, David considers that confusing when 
considering the I/O side of things.

I recall that there is

DMA_BIDIRECTIONAL -> FOLL_WRITE
DMA_TO_DEVICE -> !FOLL_WRITE
DMA_FROM_DEVICE -> FOLL_WRITE

that used different defines for a different API. Such terminology would 
be easier to get ... but then, again, not sure if we really need 
acronyms here.

We're pinning pages and FOLL_WRITE defines how we (pinning the page) are 
going to access these pages: R/O or R/W. So the read vs. write is never 
from the POC of the device (DMA read will write to the page).
Christoph Hellwig Jan. 17, 2023, 8:09 a.m. UTC | #3
On Tue, Jan 17, 2023 at 09:07:48AM +0100, David Hildenbrand wrote:
> Agreed. What I understand, David considers that confusing when considering
> the I/O side of things.
> 
> I recall that there is
> 
> DMA_BIDIRECTIONAL -> FOLL_WRITE
> DMA_TO_DEVICE -> !FOLL_WRITE
> DMA_FROM_DEVICE -> FOLL_WRITE
> 
> that used different defines for a different API. Such terminology would be
> easier to get ... but then, again, not sure if we really need acronyms here.
> 
> We're pinning pages and FOLL_WRITE defines how we (pinning the page) are
> going to access these pages: R/O or R/W. So the read vs. write is never from
> the POC of the device (DMA read will write to the page).

Yes.  Maybe the name could be a little more verboe, FOLL_MEM_WRITE or
FOLL_WRITE_TO_MEM.  But I'd really prefer any renaming to be split from
logic changes.
David Howells Jan. 17, 2023, 8:44 a.m. UTC | #4
Christoph Hellwig <hch@infradead.org> wrote:

> On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
> > Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
> > and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
> > operation.  Don't use READ and WRITE as a read I/O writes to memory and
> > vice versa - which causes confusion.
> > 
> > The direction is checked against the iterator's data_source.
> 
> Why can't we use the existing FOLL_WRITE?

Because FOLL_WRITE doesn't mean the same as WRITE:

 (1) It looks like it should really be FOLL_CHECK_PTES_WRITABLE.  It's not
     defined as being anything to do with the I/O.

 (2) The reason Al added ITER_SOURCE and ITER_DEST is that the use of READ and
     WRITE with the iterators is confusing and kind of inverted - and the same
     would apply with using FOLL_WRITE:

	if (rw == READ)
		gup_flags |= FOLL_WRITE;

So my thought is to make how you are using the buffer described by the
iterator explicit: "I'm using it as a source buffer" or "I'm using it as a
destination buffer".

Also, I don't want it to be FOLL_WRITE or 0.  I want it to be written
explicitly in both cases.  If you're going to insist on using FOLL_WRITE, then
there should be a FOLL_READ to go with it, even if it's #defined to 0.

David
Christoph Hellwig Jan. 17, 2023, 8:46 a.m. UTC | #5
On Tue, Jan 17, 2023 at 08:44:16AM +0000, David Howells wrote:
> Also, I don't want it to be FOLL_WRITE or 0.  I want it to be written
> explicitly in both cases.  If you're going to insist on using FOLL_WRITE, then
> there should be a FOLL_READ to go with it, even if it's #defined to 0.

Well, that's not how FOLL_* works.  And another new flag that is defined
to 0 but only used by some I/O callers is really confusing.
David Hildenbrand Jan. 17, 2023, 8:47 a.m. UTC | #6
On 17.01.23 09:44, David Howells wrote:
> Christoph Hellwig <hch@infradead.org> wrote:
> 
>> On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
>>> Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
>>> and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
>>> operation.  Don't use READ and WRITE as a read I/O writes to memory and
>>> vice versa - which causes confusion.
>>>
>>> The direction is checked against the iterator's data_source.
>>
>> Why can't we use the existing FOLL_WRITE?
> 
> Because FOLL_WRITE doesn't mean the same as WRITE:
> 
>   (1) It looks like it should really be FOLL_CHECK_PTES_WRITABLE.  It's not
>       defined as being anything to do with the I/O.

Especially combined with FOLL_FORCE, this is not true.
Al Viro Jan. 18, 2023, 11:03 p.m. UTC | #7
On Mon, Jan 16, 2023 at 11:57:08PM -0800, Christoph Hellwig wrote:
> On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
> > Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
> > and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
> > operation.  Don't use READ and WRITE as a read I/O writes to memory and
> > vice versa - which causes confusion.
> > 
> > The direction is checked against the iterator's data_source.
> 
> Why can't we use the existing FOLL_WRITE?

	I'm really not fond of passing FOLL_... stuff into iov_iter
primitives.  That space contains things like FOLL_PIN, which makes
no sense whatsoever for non-user-backed iterators; having the
callers pass it in makes them automatically dependent upon the
iov_iter flavour.
Al Viro Jan. 19, 2023, 12:15 a.m. UTC | #8
On Wed, Jan 18, 2023 at 11:03:52PM +0000, Al Viro wrote:
> On Mon, Jan 16, 2023 at 11:57:08PM -0800, Christoph Hellwig wrote:
> > On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
> > > Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
> > > and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
> > > operation.  Don't use READ and WRITE as a read I/O writes to memory and
> > > vice versa - which causes confusion.
> > > 
> > > The direction is checked against the iterator's data_source.
> > 
> > Why can't we use the existing FOLL_WRITE?
> 
> 	I'm really not fond of passing FOLL_... stuff into iov_iter
> primitives.  That space contains things like FOLL_PIN, which makes
> no sense whatsoever for non-user-backed iterators; having the
> callers pass it in makes them automatically dependent upon the
> iov_iter flavour.

Actually, looking at that thing...  Currently we use it only for
FOLL_PCI_P2PDMA.  It alters behaviour of get_user_pages_fast(), but...
it is completely ignored for ITER_BVEC or ITER_PIPE.  So how the
hell is it supposed to work?

And ITER_BVEC *can* get there.  blkdev_direct_IO() can get anything
->write_iter() can get, and io_uring will feed stuff to it.  For
that matter, ->read_iter() can lead to it as well, so
generic_file_splice_read() can end up passing ITER_PIPE to that
sucker.

Could somebody give a braindump on that thing?  It looks like we
have pages that should not be DMA'd to/from unless driver takes
some precautions and we want to make sure they won't be fed to
drivers that don't take such.  With checks done in a very odd
place...
Al Viro Jan. 19, 2023, 2:11 a.m. UTC | #9
On Thu, Jan 19, 2023 at 12:15:44AM +0000, Al Viro wrote:
> On Wed, Jan 18, 2023 at 11:03:52PM +0000, Al Viro wrote:
> > On Mon, Jan 16, 2023 at 11:57:08PM -0800, Christoph Hellwig wrote:
> > > On Mon, Jan 16, 2023 at 11:08:24PM +0000, David Howells wrote:
> > > > Define FOLL_SOURCE_BUF and FOLL_DEST_BUF to indicate to get_user_pages*()
> > > > and iov_iter_get_pages*() how the buffer is intended to be used in an I/O
> > > > operation.  Don't use READ and WRITE as a read I/O writes to memory and
> > > > vice versa - which causes confusion.
> > > > 
> > > > The direction is checked against the iterator's data_source.
> > > 
> > > Why can't we use the existing FOLL_WRITE?
> > 
> > 	I'm really not fond of passing FOLL_... stuff into iov_iter
> > primitives.  That space contains things like FOLL_PIN, which makes
> > no sense whatsoever for non-user-backed iterators; having the
> > callers pass it in makes them automatically dependent upon the
> > iov_iter flavour.
> 
> Actually, looking at that thing...  Currently we use it only for
> FOLL_PCI_P2PDMA.  It alters behaviour of get_user_pages_fast(), but...
> it is completely ignored for ITER_BVEC or ITER_PIPE.  So how the
> hell is it supposed to work?
> 
> And ITER_BVEC *can* get there.  blkdev_direct_IO() can get anything
> ->write_iter() can get, and io_uring will feed stuff to it.  For
> that matter, ->read_iter() can lead to it as well, so
> generic_file_splice_read() can end up passing ITER_PIPE to that
> sucker.
> 
> Could somebody give a braindump on that thing?  It looks like we
> have pages that should not be DMA'd to/from unless driver takes
> some precautions and we want to make sure they won't be fed to
> drivers that don't take such.  With checks done in a very odd
> place...

PS: Documentation/driver-api/pci/p2pdma.rst seems to imply that those
pages should not be possible to mmap, so either that needs to be
updated, or... how the hell could we run into those in g-u-p,
anyway?  Really confused...
Christoph Hellwig Jan. 19, 2023, 5:47 a.m. UTC | #10
On Thu, Jan 19, 2023 at 12:15:44AM +0000, Al Viro wrote:
> Actually, looking at that thing...  Currently we use it only for
> FOLL_PCI_P2PDMA.  It alters behaviour of get_user_pages_fast(), but...
> it is completely ignored for ITER_BVEC or ITER_PIPE.  So how the
> hell is it supposed to work?

It broadens the acceptance criteria for UBUF/IOVEC types.  It doesn't
change behavior for already accepted memory for those or any others.

> Could somebody give a braindump on that thing?  It looks like we
> have pages that should not be DMA'd to/from unless driver takes
> some precautions and we want to make sure they won't be fed to
> drivers that don't take such.  With checks done in a very odd
> place...

Yes, normal gup excludes P2P pages.  This flag allows it to get them.
Christoph Hellwig Jan. 19, 2023, 5:47 a.m. UTC | #11
On Thu, Jan 19, 2023 at 02:11:19AM +0000, Al Viro wrote:
> PS: Documentation/driver-api/pci/p2pdma.rst seems to imply that those
> pages should not be possible to mmap, so either that needs to be
> updated, or... how the hell could we run into those in g-u-p,
> anyway?  Really confused...

Yes, that needs an update.  That limitation was from before the
mmap support was added.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 5f96fcae3f75..867cf4db87ea 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1242,6 +1242,8 @@  static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page,
  * pages will have to be released using put_page() when done.
  * For multi-segment *iter, this function only adds pages from the
  * next non-empty segment of the iov iterator.
+ *
+ * The I/O direction is determined from the bio operation type.
  */
 static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
@@ -1263,6 +1265,8 @@  static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 	BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
 	pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
 
+	gup_flags |= bio_is_write(bio) ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
+
 	if (bio->bi_bdev && blk_queue_pci_p2pdma(bio->bi_bdev->bd_disk->queue))
 		gup_flags |= FOLL_PCI_P2PDMA;
 
@@ -1332,6 +1336,8 @@  static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
  * fit into the bio, or are requested in @iter, whatever is smaller. If
  * MM encounters an error pinning the requested pages, it stops. Error
  * is returned only if 0 pages could be pinned.
+ *
+ * The bio operation indicates the data direction.
  */
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
diff --git a/block/blk-map.c b/block/blk-map.c
index 08cbb7ff3b19..c30be529fb55 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -279,6 +279,8 @@  static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 	if (bio == NULL)
 		return -ENOMEM;
 
+	gup_flags |= bio_is_write(bio) ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
+
 	if (blk_queue_pci_p2pdma(rq->q))
 		gup_flags |= FOLL_PCI_P2PDMA;
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 0a4fa2a429e2..7a68db157fae 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -531,13 +531,15 @@  static const struct net_proto_family alg_family = {
 	.owner	=	THIS_MODULE,
 };
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
+int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len,
+		   unsigned int gup_flags)
 {
 	size_t off;
 	ssize_t n;
 	int npages, i;
 
-	n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
+	n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off,
+			       gup_flags);
 	if (n < 0)
 		return n;
 
@@ -1310,7 +1312,8 @@  int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
 		list_add_tail(&rsgl->list, &areq->rsgl_list);
 
 		/* make one iovec available as scatterlist */
-		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
+		err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen,
+				     FOLL_DEST_BUF);
 		if (err < 0) {
 			rsgl->sg_num_bytes = 0;
 			return err;
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 1d017ec5c63c..fe3d2258145f 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -91,7 +91,8 @@  static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
 		if (len > limit)
 			len = limit;
 
-		len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
+		len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len,
+				     FOLL_SOURCE_BUF);
 		if (len < 0) {
 			err = copied ? 0 : len;
 			goto unlock;
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index dca6346d75b3..5d10837d19ec 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -646,10 +646,13 @@  vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
 	struct scatterlist *sg = sgl;
 	ssize_t bytes;
 	size_t offset;
-	unsigned int npages = 0;
+	unsigned int npages = 0, gup_flags = 0;
 
-	bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
-				VHOST_SCSI_PREALLOC_UPAGES, &offset);
+	gup_flags |= write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF;
+
+	bytes = iov_iter_get_pages(iter, pages, LONG_MAX,
+				   VHOST_SCSI_PREALLOC_UPAGES, &offset,
+				   gup_flags);
 	/* No pages were pinned */
 	if (bytes <= 0)
 		return bytes < 0 ? bytes : -EFAULT;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c74871e37c9..cfc3353e5604 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -328,7 +328,7 @@  static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
 
 	dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
 	iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-	err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+	err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off, FOLL_DEST_BUF);
 	if (err < 0) {
 		dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
 		goto out;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 27c72a2f6af5..ffd36eeea186 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -81,7 +81,7 @@  static __le32 ceph_flags_sys2wire(u32 flags)
 #define ITER_GET_BVECS_PAGES	64
 
 static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
-				struct bio_vec *bvecs)
+				struct bio_vec *bvecs, bool write)
 {
 	size_t size = 0;
 	int bvec_idx = 0;
@@ -95,8 +95,9 @@  static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
 		size_t start;
 		int idx = 0;
 
-		bytes = iov_iter_get_pages2(iter, pages, maxsize - size,
-					   ITER_GET_BVECS_PAGES, &start);
+		bytes = iov_iter_get_pages(iter, pages, maxsize - size,
+					   ITER_GET_BVECS_PAGES, &start,
+					   write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
 		if (bytes < 0)
 			return size ?: bytes;
 
@@ -127,7 +128,8 @@  static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
  * Return the number of bytes in the created bio_vec array, or an error.
  */
 static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
-				    struct bio_vec **bvecs, int *num_bvecs)
+				    struct bio_vec **bvecs, int *num_bvecs,
+				    bool write)
 {
 	struct bio_vec *bv;
 	size_t orig_count = iov_iter_count(iter);
@@ -146,7 +148,7 @@  static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
 	if (!bv)
 		return -ENOMEM;
 
-	bytes = __iter_get_bvecs(iter, maxsize, bv);
+	bytes = __iter_get_bvecs(iter, maxsize, bv, write);
 	if (bytes < 0) {
 		/*
 		 * No pages were pinned -- just free the array.
@@ -1334,7 +1336,7 @@  ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 			break;
 		}
 
-		len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
+		len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages, write);
 		if (len < 0) {
 			ceph_osdc_put_request(req);
 			ret = len;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 22dfc1f8b4f1..d100b9cb8682 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3290,8 +3290,8 @@  cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 		if (ctx->direct_io) {
 			ssize_t result;
 
-			result = iov_iter_get_pages_alloc2(
-				from, &pagevec, cur_len, &start);
+			result = iov_iter_get_pages_alloc(
+				from, &pagevec, cur_len, &start, FOLL_SOURCE_BUF);
 			if (result < 0) {
 				cifs_dbg(VFS,
 					 "direct_writev couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
@@ -4031,9 +4031,9 @@  cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 		if (ctx->direct_io) {
 			ssize_t result;
 
-			result = iov_iter_get_pages_alloc2(
+			result = iov_iter_get_pages_alloc(
 					&direct_iov, &pagevec,
-					cur_len, &start);
+					cur_len, &start, FOLL_DEST_BUF);
 			if (result < 0) {
 				cifs_dbg(VFS,
 					 "Couldn't get user pages (rc=%zd) iter type %d iov_offset %zd count %zd\n",
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 4d3c586785a5..9655cf359ab9 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1030,7 +1030,8 @@  setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	saved_len = count;
 
 	while (count && npages < max_pages) {
-		rc = iov_iter_get_pages2(iter, pages, count, max_pages, &start);
+		rc = iov_iter_get_pages(iter, pages, count, max_pages, &start,
+					rw == WRITE ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
 		if (rc < 0) {
 			cifs_dbg(VFS, "Couldn't get user pages (rc=%zd)\n", rc);
 			break;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index cf196f2a211e..b1e26a706e31 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -169,8 +169,10 @@  static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
 	const enum req_op dio_op = dio->opf & REQ_OP_MASK;
 	ssize_t ret;
 
-	ret = iov_iter_get_pages2(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
-				&sdio->from);
+	ret = iov_iter_get_pages(sdio->iter, dio->pages, LONG_MAX, DIO_PAGES,
+				 &sdio->from,
+				 op_is_write(dio_op) ?
+				 FOLL_SOURCE_BUF : FOLL_DEST_BUF);
 
 	if (ret < 0 && sdio->blocks_available && dio_op == REQ_OP_WRITE) {
 		struct page *page = ZERO_PAGE(0);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e8b60ce72c9a..e3d8443e24a6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -730,7 +730,8 @@  static int fuse_copy_fill(struct fuse_copy_state *cs)
 		}
 	} else {
 		size_t off;
-		err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
+		err = iov_iter_get_pages(cs->iter, &page, PAGE_SIZE, 1, &off,
+					 cs->write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
 		if (err < 0)
 			return err;
 		BUG_ON(!err);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d68b45f8b3ae..68c196437306 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1414,10 +1414,10 @@  static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
 	while (nbytes < *nbytesp && ap->num_pages < max_pages) {
 		unsigned npages;
 		size_t start;
-		ret = iov_iter_get_pages2(ii, &ap->pages[ap->num_pages],
-					*nbytesp - nbytes,
-					max_pages - ap->num_pages,
-					&start);
+		ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages],
+					 *nbytesp - nbytes,
+					 max_pages - ap->num_pages,
+					 &start, write ? FOLL_SOURCE_BUF : FOLL_DEST_BUF);
 		if (ret < 0)
 			break;
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index d865945f2a63..42af84685f20 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -332,8 +332,9 @@  static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc2(iter, &pagevec,
-						  rsize, &pgbase);
+		result = iov_iter_get_pages_alloc(iter, &pagevec,
+						  rsize, &pgbase,
+						  FOLL_DEST_BUF);
 		if (result < 0)
 			break;
 	
@@ -791,8 +792,9 @@  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc2(iter, &pagevec,
-						  wsize, &pgbase);
+		result = iov_iter_get_pages_alloc(iter, &pagevec,
+						  wsize, &pgbase,
+						  FOLL_SOURCE_BUF);
 		if (result < 0)
 			break;
 
diff --git a/fs/splice.c b/fs/splice.c
index 5969b7a1d353..19c5b5adc548 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1165,7 +1165,8 @@  static int iter_to_pipe(struct iov_iter *from,
 		size_t start;
 		int i, n;
 
-		left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start);
+		left = iov_iter_get_pages(from, pages, ~0UL, 16, &start,
+					  FOLL_SOURCE_BUF);
 		if (left <= 0) {
 			ret = left;
 			break;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index a5db86670bdf..12058ab6cad9 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -165,7 +165,8 @@  int af_alg_release(struct socket *sock);
 void af_alg_release_parent(struct sock *sk);
 int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
+int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len,
+		   unsigned int gup_flags);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
 
 static inline struct alg_sock *alg_sk(struct sock *sk)
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 22078a28d7cb..3f7ba7fe48ac 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -40,11 +40,25 @@  static inline unsigned int bio_max_segs(unsigned int nr_segs)
 #define bio_sectors(bio)	bvec_iter_sectors((bio)->bi_iter)
 #define bio_end_sector(bio)	bvec_iter_end_sector((bio)->bi_iter)
 
+/**
+ * bio_is_write - Query if the I/O direction is towards the disk
+ * @bio: The bio to query
+ *
+ * Return true if this is some sort of write operation - ie. the data is going
+ * towards the disk.
+ */
+static inline bool bio_is_write(const struct bio *bio)
+{
+	return op_is_write(bio_op(bio));
+}
+
 /*
  * Return the data direction, READ or WRITE.
  */
-#define bio_data_dir(bio) \
-	(op_is_write(bio_op(bio)) ? WRITE : READ)
+static inline int bio_data_dir(const struct bio *bio)
+{
+	return bio_is_write(bio) ? WRITE : READ;
+}
 
 /*
  * Check whether this bio carries any data or not. A NULL bio is allowed.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f3f196e4d66d..3af4ca8b1fe7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3090,6 +3090,10 @@  struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
 #define FOLL_INTERRUPTIBLE  0x200000 /* allow interrupts from generic signals */
 
+#define FOLL_SOURCE_BUF	0		/* Memory will be read from by I/O */
+#define FOLL_DEST_BUF	FOLL_WRITE	/* Memory will be written to by I/O */
+#define FOLL_BUF_MASK	FOLL_WRITE
+
 /*
  * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
  * other. Here is what they mean, and how to use them:
@@ -3143,6 +3147,12 @@  struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
  * releasing pages: get_user_pages*() pages must be released via put_page(),
  * while pin_user_pages*() pages must be released via unpin_user_page().
  *
+ * FOLL_SOURCE_BUF and FOLL_DEST_BUF are indicators to get_user_pages*() and
+ * iov_iter_*_pages*() as to how the pages obtained are going to be used.
+ * FOLL_SOURCE_BUF indicates that I/O op is going to transfer from memory to
+ * device; FOLL_DEST_BUF that the op is going to transfer from device to
+ * memory.
+ *
  * Please see Documentation/core-api/pin_user_pages.rst for more information.
  */
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 68497d9c1452..f53583836009 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1429,11 +1429,6 @@  static struct page *first_bvec_segment(const struct iov_iter *i,
 	return page;
 }
 
-static unsigned char iov_iter_rw(const struct iov_iter *i)
-{
-	return i->data_source ? WRITE : READ;
-}
-
 static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 		   struct page ***pages, size_t maxsize,
 		   unsigned int maxpages, size_t *start,
@@ -1448,12 +1443,17 @@  static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i,
 	if (maxsize > MAX_RW_COUNT)
 		maxsize = MAX_RW_COUNT;
 
+	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_SOURCE_BUF &&
+			 i->data_source == ITER_DEST))
+		return -EIO;
+	if (WARN_ON_ONCE((gup_flags & FOLL_BUF_MASK) == FOLL_DEST_BUF &&
+			 i->data_source == ITER_SOURCE))
+		return -EIO;
+
 	if (likely(user_backed_iter(i))) {
 		unsigned long addr;
 		int res;
 
-		if (iov_iter_rw(i) != WRITE)
-			gup_flags |= FOLL_WRITE;
 		if (i->nofault)
 			gup_flags |= FOLL_NOFAULT;
 
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..eb28b54fe5f6 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -310,7 +310,8 @@  static int p9_get_mapped_pages(struct virtio_chan *chan,
 			       struct iov_iter *data,
 			       int count,
 			       size_t *offs,
-			       int *need_drop)
+			       int *need_drop,
+			       unsigned int gup_flags)
 {
 	int nr_pages;
 	int err;
@@ -330,7 +331,8 @@  static int p9_get_mapped_pages(struct virtio_chan *chan,
 			if (err == -ERESTARTSYS)
 				return err;
 		}
-		n = iov_iter_get_pages_alloc2(data, pages, count, offs);
+		n = iov_iter_get_pages_alloc(data, pages, count, offs,
+					     gup_flags);
 		if (n < 0)
 			return n;
 		*need_drop = 1;
@@ -437,7 +439,8 @@  p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 	if (uodata) {
 		__le32 sz;
 		int n = p9_get_mapped_pages(chan, &out_pages, uodata,
-					    outlen, &offs, &need_drop);
+					    outlen, &offs, &need_drop,
+					    FOLL_DEST_BUF);
 		if (n < 0) {
 			err = n;
 			goto err_out;
@@ -456,7 +459,8 @@  p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
 		memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
 	} else if (uidata) {
 		int n = p9_get_mapped_pages(chan, &in_pages, uidata,
-					    inlen, &offs, &need_drop);
+					    inlen, &offs, &need_drop,
+					    FOLL_SOURCE_BUF);
 		if (n < 0) {
 			err = n;
 			goto err_out;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index e4ff2db40c98..9f0914b781ad 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -632,8 +632,9 @@  int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk,
 		if (frag == MAX_SKB_FRAGS)
 			return -EMSGSIZE;
 
-		copied = iov_iter_get_pages2(from, pages, length,
-					    MAX_SKB_FRAGS - frag, &start);
+		copied = iov_iter_get_pages(from, pages, length,
+					    MAX_SKB_FRAGS - frag, &start,
+					    FOLL_SOURCE_BUF);
 		if (copied < 0)
 			return -EFAULT;
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 53d0251788aa..f63a13690712 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -324,8 +324,8 @@  int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			goto out;
 		}
 
-		copied = iov_iter_get_pages2(from, pages, bytes, maxpages,
-					    &offset);
+		copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+					    &offset, FOLL_SOURCE_BUF);
 		if (copied <= 0) {
 			ret = -EFAULT;
 			goto out;
diff --git a/net/rds/message.c b/net/rds/message.c
index b47e4f0a1639..fcfd406b97af 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -390,8 +390,8 @@  static int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *
 		size_t start;
 		ssize_t copied;
 
-		copied = iov_iter_get_pages2(from, &pages, PAGE_SIZE,
-					    1, &start);
+		copied = iov_iter_get_pages(from, &pages, PAGE_SIZE,
+					    1, &start, FOLL_SOURCE_BUF);
 		if (copied < 0) {
 			struct mmpin *mmp;
 			int i;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9ed978634125..59acaeb24f54 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1354,9 +1354,8 @@  static int tls_setup_from_iter(struct iov_iter *from,
 			rc = -EFAULT;
 			goto out;
 		}
-		copied = iov_iter_get_pages2(from, pages,
-					    length,
-					    maxpages, &offset);
+		copied = iov_iter_get_pages(from, pages, length,
+					    maxpages, &offset, FOLL_DEST_BUF);
 		if (copied <= 0) {
 			rc = -EFAULT;
 			goto out;