diff mbox series

[8/8] block: never take page references for ITER_BVEC

Message ID 20190502233332.28720-9-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [1/8] block: move the BIO_NO_PAGE_REF check into bio_release_pages | expand

Commit Message

Christoph Hellwig May 2, 2019, 11:33 p.m. UTC
If we pass pages through an iov_iter we always already have a reference
in the caller.  Thus remove the ITER_BVEC_FLAG_NO_REF and don't take
reference to pages by default for bvec backed iov_iters.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/bio.c          | 14 +-------------
 drivers/block/loop.c | 16 ++++------------
 fs/io_uring.c        |  3 ---
 include/linux/uio.h  |  8 --------
 4 files changed, 5 insertions(+), 36 deletions(-)

Comments

Johannes Thumshirn May 4, 2019, 5:41 p.m. UTC | #1
Looks good,
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Ming Lei May 6, 2019, 8:19 a.m. UTC | #2
On Thu, May 02, 2019 at 07:33:32PM -0400, Christoph Hellwig wrote:
> If we pass pages through an iov_iter we always already have a reference
> in the caller.  Thus remove the ITER_BVEC_FLAG_NO_REF and don't take
> reference to pages by default for bvec backed iov_iters.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  block/bio.c          | 14 +-------------
>  drivers/block/loop.c | 16 ++++------------
>  fs/io_uring.c        |  3 ---
>  include/linux/uio.h  |  8 --------
>  4 files changed, 5 insertions(+), 36 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 3938e179a530..e999d530d863 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -846,15 +846,6 @@ int bio_add_page(struct bio *bio, struct page *page,
>  }
>  EXPORT_SYMBOL(bio_add_page);
>  
> -static void bio_get_pages(struct bio *bio)
> -{
> -	struct bvec_iter_all iter_all;
> -	struct bio_vec *bvec;
> -
> -	bio_for_each_segment_all(bvec, bio, iter_all)
> -		get_page(bvec->bv_page);
> -}
> -
>  void bio_release_pages(struct bio *bio)
>  {
>  	struct bvec_iter_all iter_all;
> @@ -967,11 +958,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
>  			ret = __bio_iov_iter_get_pages(bio, iter);
>  	} while (!ret && iov_iter_count(iter) && !bio_full(bio));
>  
> -	if (iov_iter_bvec_no_ref(iter))
> +	if (is_bvec)
>  		bio_set_flag(bio, BIO_NO_PAGE_REF);
> -	else if (is_bvec)
> -		bio_get_pages(bio);
> -
>  	return bio->bi_vcnt ? 0 : ret;
>  }
>  
> diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> index 102d79575895..c20710e617c2 100644
> --- a/drivers/block/loop.c
> +++ b/drivers/block/loop.c
> @@ -264,20 +264,12 @@ lo_do_transfer(struct loop_device *lo, int cmd,
>  	return ret;
>  }
>  
> -static inline void loop_iov_iter_bvec(struct iov_iter *i,
> -		unsigned int direction, const struct bio_vec *bvec,
> -		unsigned long nr_segs, size_t count)
> -{
> -	iov_iter_bvec(i, direction, bvec, nr_segs, count);
> -	i->type |= ITER_BVEC_FLAG_NO_REF;
> -}
> -
>  static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
>  {
>  	struct iov_iter i;
>  	ssize_t bw;
>  
> -	loop_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
> +	iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
>  
>  	file_start_write(file);
>  	bw = vfs_iter_write(file, &i, ppos, 0);
> @@ -355,7 +347,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
>  	ssize_t len;
>  
>  	rq_for_each_segment(bvec, rq, iter) {
> -		loop_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
> +		iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
>  		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
>  		if (len < 0)
>  			return len;
> @@ -396,7 +388,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
>  		b.bv_offset = 0;
>  		b.bv_len = bvec.bv_len;
>  
> -		loop_iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
> +		iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
>  		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
>  		if (len < 0) {
>  			ret = len;
> @@ -563,7 +555,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
>  	}
>  	atomic_set(&cmd->ref, 2);
>  
> -	loop_iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
> +	iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
>  	iter.iov_offset = offset;
>  
>  	cmd->iocb.ki_pos = pos;
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index f65f85d89217..f7eb63a5b3db 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -853,9 +853,6 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
>  	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
>  	if (offset)
>  		iov_iter_advance(iter, offset);
> -
> -	/* don't drop a reference to these pages */
> -	iter->type |= ITER_BVEC_FLAG_NO_REF;
>  	return 0;
>  }
>  
> diff --git a/include/linux/uio.h b/include/linux/uio.h
> index f184af1999a8..bace8fd40d0c 100644
> --- a/include/linux/uio.h
> +++ b/include/linux/uio.h
> @@ -23,9 +23,6 @@ struct kvec {
>  };
>  
>  enum iter_type {
> -	/* set if ITER_BVEC doesn't hold a bv_page ref */
> -	ITER_BVEC_FLAG_NO_REF = 2,
> -
>  	/* iter types */
>  	ITER_IOVEC = 4,
>  	ITER_KVEC = 8,
> @@ -93,11 +90,6 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i)
>  	return i->type & (READ | WRITE);
>  }
>  
> -static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
> -{
> -	return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
> -}
> -
>  /*
>   * Total number of bytes covered by an iovec.
>   *

I remember that this way is the initial version of Jens' patch, however
kernel bug is triggered:

https://lore.kernel.org/linux-block/20190226034613.GA676@sol.localdomain/

Or maybe I miss some recent changes, could you explain it a bit?

Thanks,
Ming
Christoph Hellwig May 6, 2019, 1:30 p.m. UTC | #3
On Mon, May 06, 2019 at 04:19:54PM +0800, Ming Lei wrote:
> I remember that this way is the initial version of Jens' patch, however
> kernel bug is triggered:
> 
> https://lore.kernel.org/linux-block/20190226034613.GA676@sol.localdomain/
> 
> Or maybe I miss some recent changes, could you explain it a bit?

I'm not even sure how the original would crash..  When I run the
rest locally it gets and EBUSY from ioctl LOOP_SET_FD, so I'm
not sure what it tends up testing in the end.
Christoph Hellwig May 7, 2019, 6:09 a.m. UTC | #4
On Mon, May 06, 2019 at 03:30:27PM +0200, Christoph Hellwig wrote:
> On Mon, May 06, 2019 at 04:19:54PM +0800, Ming Lei wrote:
> > I remember that this way is the initial version of Jens' patch, however
> > kernel bug is triggered:
> > 
> > https://lore.kernel.org/linux-block/20190226034613.GA676@sol.localdomain/
> > 
> > Or maybe I miss some recent changes, could you explain it a bit?
> 
> I'm not even sure how the original would crash..  When I run the
> rest locally it gets and EBUSY from ioctl LOOP_SET_FD, so I'm
> not sure what it tends up testing in the end.

I also did another run with KASAN enabled and a clean environment,
this gives no EBUSY and still works fine.  I still don't understand
what the original problem might have been here, though.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 3938e179a530..e999d530d863 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -846,15 +846,6 @@  int bio_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_add_page);
 
-static void bio_get_pages(struct bio *bio)
-{
-	struct bvec_iter_all iter_all;
-	struct bio_vec *bvec;
-
-	bio_for_each_segment_all(bvec, bio, iter_all)
-		get_page(bvec->bv_page);
-}
-
 void bio_release_pages(struct bio *bio)
 {
 	struct bvec_iter_all iter_all;
@@ -967,11 +958,8 @@  int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 			ret = __bio_iov_iter_get_pages(bio, iter);
 	} while (!ret && iov_iter_count(iter) && !bio_full(bio));
 
-	if (iov_iter_bvec_no_ref(iter))
+	if (is_bvec)
 		bio_set_flag(bio, BIO_NO_PAGE_REF);
-	else if (is_bvec)
-		bio_get_pages(bio);
-
 	return bio->bi_vcnt ? 0 : ret;
 }
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 102d79575895..c20710e617c2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -264,20 +264,12 @@  lo_do_transfer(struct loop_device *lo, int cmd,
 	return ret;
 }
 
-static inline void loop_iov_iter_bvec(struct iov_iter *i,
-		unsigned int direction, const struct bio_vec *bvec,
-		unsigned long nr_segs, size_t count)
-{
-	iov_iter_bvec(i, direction, bvec, nr_segs, count);
-	i->type |= ITER_BVEC_FLAG_NO_REF;
-}
-
 static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 {
 	struct iov_iter i;
 	ssize_t bw;
 
-	loop_iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
+	iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
 	bw = vfs_iter_write(file, &i, ppos, 0);
@@ -355,7 +347,7 @@  static int lo_read_simple(struct loop_device *lo, struct request *rq,
 	ssize_t len;
 
 	rq_for_each_segment(bvec, rq, iter) {
-		loop_iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
+		iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0)
 			return len;
@@ -396,7 +388,7 @@  static int lo_read_transfer(struct loop_device *lo, struct request *rq,
 		b.bv_offset = 0;
 		b.bv_len = bvec.bv_len;
 
-		loop_iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
+		iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0) {
 			ret = len;
@@ -563,7 +555,7 @@  static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	}
 	atomic_set(&cmd->ref, 2);
 
-	loop_iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
+	iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq));
 	iter.iov_offset = offset;
 
 	cmd->iocb.ki_pos = pos;
diff --git a/fs/io_uring.c b/fs/io_uring.c
index f65f85d89217..f7eb63a5b3db 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -853,9 +853,6 @@  static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
 	iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
 	if (offset)
 		iov_iter_advance(iter, offset);
-
-	/* don't drop a reference to these pages */
-	iter->type |= ITER_BVEC_FLAG_NO_REF;
 	return 0;
 }
 
diff --git a/include/linux/uio.h b/include/linux/uio.h
index f184af1999a8..bace8fd40d0c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -23,9 +23,6 @@  struct kvec {
 };
 
 enum iter_type {
-	/* set if ITER_BVEC doesn't hold a bv_page ref */
-	ITER_BVEC_FLAG_NO_REF = 2,
-
 	/* iter types */
 	ITER_IOVEC = 4,
 	ITER_KVEC = 8,
@@ -93,11 +90,6 @@  static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 	return i->type & (READ | WRITE);
 }
 
-static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i)
-{
-	return (i->type & ITER_BVEC_FLAG_NO_REF) != 0;
-}
-
 /*
  * Total number of bytes covered by an iovec.
  *