diff mbox series

[v8,07/10] block: Switch to pinning pages.

Message ID 20230123173007.325544-8-dhowells@redhat.com (mailing list archive)
State New, archived
Headers show
Series iov_iter: Improve page extraction (pin or just list) | expand

Commit Message

David Howells Jan. 23, 2023, 5:30 p.m. UTC
Add BIO_PAGE_PINNED to indicate that the pages in a bio are pinned
(FOLL_PIN) and that the pin will need removing.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Al Viro <viro@zeniv.linux.org.uk>
cc: Jens Axboe <axboe@kernel.dk>
cc: Jan Kara <jack@suse.cz>
cc: Christoph Hellwig <hch@lst.de>
cc: Matthew Wilcox <willy@infradead.org>
cc: Logan Gunthorpe <logang@deltatee.com>
cc: linux-block@vger.kernel.org
---

Notes:
    ver #8)
     - Move the infrastructure to clean up pinned pages to this patch [hch].
     - Put BIO_PAGE_PINNED before BIO_PAGE_REFFED as the latter should
       probably be removed at some point.  FOLL_PIN can then be renumbered
       first.

 block/bio.c               |  7 ++++---
 block/blk.h               | 28 ++++++++++++++++++++++++++++
 include/linux/bio.h       |  3 ++-
 include/linux/blk_types.h |  1 +
 4 files changed, 35 insertions(+), 4 deletions(-)

Comments

Christoph Hellwig Jan. 23, 2023, 6:23 p.m. UTC | #1
On Mon, Jan 23, 2023 at 05:30:04PM +0000, David Howells wrote:
> Add BIO_PAGE_PINNED to indicate that the pages in a bio are pinned
> (FOLL_PIN) and that the pin will need removing.

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
David Hildenbrand Jan. 24, 2023, 2:32 p.m. UTC | #2
On 23.01.23 18:30, David Howells wrote:
> Add BIO_PAGE_PINNED to indicate that the pages in a bio are pinned
> (FOLL_PIN) and that the pin will need removing.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Al Viro <viro@zeniv.linux.org.uk>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Jan Kara <jack@suse.cz>
> cc: Christoph Hellwig <hch@lst.de>
> cc: Matthew Wilcox <willy@infradead.org>
> cc: Logan Gunthorpe <logang@deltatee.com>
> cc: linux-block@vger.kernel.org
> ---
> 
> Notes:
>      ver #8)
>       - Move the infrastructure to clean up pinned pages to this patch [hch].
>       - Put BIO_PAGE_PINNED before BIO_PAGE_REFFED as the latter should
>         probably be removed at some point.  FOLL_PIN can then be renumbered
>         first.
> 
>   block/bio.c               |  7 ++++---
>   block/blk.h               | 28 ++++++++++++++++++++++++++++
>   include/linux/bio.h       |  3 ++-
>   include/linux/blk_types.h |  1 +
>   4 files changed, 35 insertions(+), 4 deletions(-)
> 
> diff --git a/block/bio.c b/block/bio.c
> index 40c2b01906da..6f98bcfc0c92 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -1170,13 +1170,14 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
>   
>   void __bio_release_pages(struct bio *bio, bool mark_dirty)
>   {
> +	unsigned int gup_flags = bio_to_gup_flags(bio);
>   	struct bvec_iter_all iter_all;
>   	struct bio_vec *bvec;
>   
>   	bio_for_each_segment_all(bvec, bio, iter_all) {
>   		if (mark_dirty && !PageCompound(bvec->bv_page))
>   			set_page_dirty_lock(bvec->bv_page);
> -		put_page(bvec->bv_page);
> +		page_put_unpin(bvec->bv_page, gup_flags);
>   	}
>   }
>   EXPORT_SYMBOL_GPL(__bio_release_pages);
> @@ -1496,8 +1497,8 @@ void bio_set_pages_dirty(struct bio *bio)
>    * the BIO and re-dirty the pages in process context.
>    *
>    * It is expected that bio_check_pages_dirty() will wholly own the BIO from
> - * here on.  It will run one put_page() against each page and will run one
> - * bio_put() against the BIO.
> + * here on.  It will run one page_put_unpin() against each page and will run
> + * one bio_put() against the BIO.
>    */
>   
>   static void bio_dirty_fn(struct work_struct *work);
> diff --git a/block/blk.h b/block/blk.h
> index 4c3b3325219a..294044d696e0 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -425,6 +425,34 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
>   		struct page *page, unsigned int len, unsigned int offset,
>   		unsigned int max_sectors, bool *same_page);
>   
> +/*
> + * Set the cleanup mode for a bio from an iterator and the extraction flags.
> + */
> +static inline void bio_set_cleanup_mode(struct bio *bio, struct iov_iter *iter)
> +{
> +	unsigned int cleanup_mode = iov_iter_extract_mode(iter);
> +
> +	if (cleanup_mode & FOLL_GET)
> +		bio_set_flag(bio, BIO_PAGE_REFFED);
> +	if (cleanup_mode & FOLL_PIN)
> +		bio_set_flag(bio, BIO_PAGE_PINNED);

Can FOLL_GET ever happen?

IOW, can't this even be

if (user_backed_iter(iter))
	bio_set_flag(bio, BIO_PAGE_PINNED);
David Howells Jan. 24, 2023, 2:47 p.m. UTC | #3
David Hildenbrand <david@redhat.com> wrote:

> > +static inline void bio_set_cleanup_mode(struct bio *bio, struct iov_iter *iter)
> > +{
> > +	unsigned int cleanup_mode = iov_iter_extract_mode(iter);
> > +
> > +	if (cleanup_mode & FOLL_GET)
> > +		bio_set_flag(bio, BIO_PAGE_REFFED);
> > +	if (cleanup_mode & FOLL_PIN)
> > +		bio_set_flag(bio, BIO_PAGE_PINNED);
> 
> Can FOLL_GET ever happen?

Yes - unless patches 8 and 9 are merged.  I had them as one, but Christoph
split them up.

David
Christoph Hellwig Jan. 24, 2023, 2:53 p.m. UTC | #4
On Tue, Jan 24, 2023 at 02:47:51PM +0000, David Howells wrote:
> > > +static inline void bio_set_cleanup_mode(struct bio *bio, struct iov_iter *iter)
> > > +{
> > > +	unsigned int cleanup_mode = iov_iter_extract_mode(iter);
> > > +
> > > +	if (cleanup_mode & FOLL_GET)
> > > +		bio_set_flag(bio, BIO_PAGE_REFFED);
> > > +	if (cleanup_mode & FOLL_PIN)
> > > +		bio_set_flag(bio, BIO_PAGE_PINNED);
> > 
> > Can FOLL_GET ever happen?
> 
> Yes - unless patches 8 and 9 are merged.  I had them as one, but Christoph
> split them up.

It can't.  Per your latest branch:

#define iov_iter_extract_mode(iter) (user_backed_iter(iter) ? FOLL_PIN : 0)
David Howells Jan. 24, 2023, 3:03 p.m. UTC | #5
Christoph Hellwig <hch@infradead.org> wrote:

> It can't.  Per your latest branch:

Yes it can.  Patch 6:

--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -282,6 +282,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
 	if (blk_queue_pci_p2pdma(rq->q))
 		extract_flags |= ITER_ALLOW_P2PDMA;
 
+	bio_set_flag(bio, BIO_PAGE_REFFED);
 	while (iov_iter_count(iter)) {
 		struct page **pages, *stack_pages[UIO_FASTIOV];
 		ssize_t bytes;

Patch 7:

+static inline unsigned int bio_to_gup_flags(struct bio *bio)
+{
+	return (bio_flagged(bio, BIO_PAGE_REFFED) ? FOLL_GET : 0) |
+		(bio_flagged(bio, BIO_PAGE_PINNED) ? FOLL_PIN : 0);
+}
+
+/*
+ * Clean up a page appropriately, where the page may be pinned, may have a
+ * ref taken on it or neither.
+ */
+static inline void bio_release_page(struct bio *bio, struct page *page)
+{
+	page_put_unpin(page, bio_to_gup_flags(bio));
+}

At patch 8, you can get either FOLL_GET, FOLL_PIN or 0, depending on the path
you go through.

David
Christoph Hellwig Jan. 24, 2023, 4:44 p.m. UTC | #6
On Tue, Jan 24, 2023 at 03:03:53PM +0000, David Howells wrote:
> Christoph Hellwig <hch@infradead.org> wrote:
> 
> > It can't.  Per your latest branch:
> 
> Yes it can.  Patch 6:

This never involves the cleanup mode as input.  And as I pointed out
in the other mail, there is no need for the FOLL_ flags on the
cleanup side.  You can just check the bio flag in bio_release_apges
and call either put_page or unpin_user_page on that.  The direct
callers of bio_release_page never mix them pin and get cases anyway.
Let me find some time to code this up if it's easier to understand
that way.
David Hildenbrand Jan. 24, 2023, 4:46 p.m. UTC | #7
On 24.01.23 17:44, Christoph Hellwig wrote:
> On Tue, Jan 24, 2023 at 03:03:53PM +0000, David Howells wrote:
>> Christoph Hellwig <hch@infradead.org> wrote:
>>
>>> It can't.  Per your latest branch:
>>
>> Yes it can.  Patch 6:
> 
> This never involves the cleanup mode as input.  And as I pointed out
> in the other mail, there is no need for the FOLL_ flags on the
> cleanup side.  You can just check the bio flag in bio_release_apges
> and call either put_page or unpin_user_page on that.  The direct
> callers of bio_release_page never mix them pin and get cases anyway.
> Let me find some time to code this up if it's easier to understand
> that way.

In case this series gets resend (which I assume), it would be great to 
CC linux-mm on the whole thing.
Christoph Hellwig Jan. 24, 2023, 4:59 p.m. UTC | #8
This is the incremental patch.  Doesn't do the FOLL_PIN to bool
conversion for the extra helper yet, and needs to be folded into the
original patches still.


diff --git a/block/bio.c b/block/bio.c
index 6dc54bf3ed27d4..bd8433f3644fd7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1170,14 +1170,20 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
 
 void __bio_release_pages(struct bio *bio, bool mark_dirty)
 {
-	unsigned int gup_flags = bio_to_gup_flags(bio);
+	bool pinned = bio_flagged(bio, BIO_PAGE_PINNED);
+	bool reffed = bio_flagged(bio, BIO_PAGE_REFFED);
 	struct bvec_iter_all iter_all;
 	struct bio_vec *bvec;
 
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		if (mark_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
-		page_put_unpin(bvec->bv_page, gup_flags);
+
+		if (pinned)
+			unpin_user_page(bvec->bv_page);
+		/* this can go away once direct-io.c is converted: */
+		else if (reffed)
+			put_page(bvec->bv_page);
 	}
 }
 EXPORT_SYMBOL_GPL(__bio_release_pages);
diff --git a/block/blk.h b/block/blk.h
index 294044d696e09f..a16d4425d2751c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -430,27 +430,19 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
  */
 static inline void bio_set_cleanup_mode(struct bio *bio, struct iov_iter *iter)
 {
-	unsigned int cleanup_mode = iov_iter_extract_mode(iter);
-
-	if (cleanup_mode & FOLL_GET)
-		bio_set_flag(bio, BIO_PAGE_REFFED);
-	if (cleanup_mode & FOLL_PIN)
+	if (iov_iter_extract_mode(iter) & FOLL_PIN)
 		bio_set_flag(bio, BIO_PAGE_PINNED);
 }
 
-static inline unsigned int bio_to_gup_flags(struct bio *bio)
-{
-	return (bio_flagged(bio, BIO_PAGE_REFFED) ? FOLL_GET : 0) |
-		(bio_flagged(bio, BIO_PAGE_PINNED) ? FOLL_PIN : 0);
-}
-
 /*
  * Clean up a page appropriately, where the page may be pinned, may have a
  * ref taken on it or neither.
  */
 static inline void bio_release_page(struct bio *bio, struct page *page)
 {
-	page_put_unpin(page, bio_to_gup_flags(bio));
+	WARN_ON_ONCE(bio_flagged(bio, BIO_PAGE_REFFED));
+	if (bio_flagged(bio, BIO_PAGE_PINNED))
+		unpin_user_page(page);
 }
 
 struct request_queue *blk_alloc_queue(int node_id);
David Howells Jan. 24, 2023, 6:37 p.m. UTC | #9
Christoph Hellwig <hch@infradead.org> wrote:

> +	WARN_ON_ONCE(bio_flagged(bio, BIO_PAGE_REFFED));

It's still set by fs/direct-io.c.

David
David Howells Jan. 24, 2023, 6:38 p.m. UTC | #10
David Hildenbrand <david@redhat.com> wrote:

> In case this series gets resend (which I assume), it would be great to CC
> linux-mm on the whole thing.

v9 is already posted, but I hadn't added linux-mm to it.  I dropped all the
bits that touched the mm side of things.

David
Christoph Hellwig Jan. 24, 2023, 6:55 p.m. UTC | #11
On Tue, Jan 24, 2023 at 06:37:17PM +0000, David Howells wrote:
> Christoph Hellwig <hch@infradead.org> wrote:
> 
> > +	WARN_ON_ONCE(bio_flagged(bio, BIO_PAGE_REFFED));
> 
> It's still set by fs/direct-io.c.

But that should never end up in bio_release_page, only
bio_release_pages.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 40c2b01906da..6f98bcfc0c92 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1170,13 +1170,14 @@  bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
 
 void __bio_release_pages(struct bio *bio, bool mark_dirty)
 {
+	unsigned int gup_flags = bio_to_gup_flags(bio);
 	struct bvec_iter_all iter_all;
 	struct bio_vec *bvec;
 
 	bio_for_each_segment_all(bvec, bio, iter_all) {
 		if (mark_dirty && !PageCompound(bvec->bv_page))
 			set_page_dirty_lock(bvec->bv_page);
-		put_page(bvec->bv_page);
+		page_put_unpin(bvec->bv_page, gup_flags);
 	}
 }
 EXPORT_SYMBOL_GPL(__bio_release_pages);
@@ -1496,8 +1497,8 @@  void bio_set_pages_dirty(struct bio *bio)
  * the BIO and re-dirty the pages in process context.
  *
  * It is expected that bio_check_pages_dirty() will wholly own the BIO from
- * here on.  It will run one put_page() against each page and will run one
- * bio_put() against the BIO.
+ * here on.  It will run one page_put_unpin() against each page and will run
+ * one bio_put() against the BIO.
  */
 
 static void bio_dirty_fn(struct work_struct *work);
diff --git a/block/blk.h b/block/blk.h
index 4c3b3325219a..294044d696e0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -425,6 +425,34 @@  int bio_add_hw_page(struct request_queue *q, struct bio *bio,
 		struct page *page, unsigned int len, unsigned int offset,
 		unsigned int max_sectors, bool *same_page);
 
+/*
+ * Set the cleanup mode for a bio from an iterator and the extraction flags.
+ */
+static inline void bio_set_cleanup_mode(struct bio *bio, struct iov_iter *iter)
+{
+	unsigned int cleanup_mode = iov_iter_extract_mode(iter);
+
+	if (cleanup_mode & FOLL_GET)
+		bio_set_flag(bio, BIO_PAGE_REFFED);
+	if (cleanup_mode & FOLL_PIN)
+		bio_set_flag(bio, BIO_PAGE_PINNED);
+}
+
+static inline unsigned int bio_to_gup_flags(struct bio *bio)
+{
+	return (bio_flagged(bio, BIO_PAGE_REFFED) ? FOLL_GET : 0) |
+		(bio_flagged(bio, BIO_PAGE_PINNED) ? FOLL_PIN : 0);
+}
+
+/*
+ * Clean up a page appropriately, where the page may be pinned, may have a
+ * ref taken on it or neither.
+ */
+static inline void bio_release_page(struct bio *bio, struct page *page)
+{
+	page_put_unpin(page, bio_to_gup_flags(bio));
+}
+
 struct request_queue *blk_alloc_queue(int node_id);
 
 int disk_scan_partitions(struct gendisk *disk, fmode_t mode, void *owner);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 805957c99147..b2c09997d79c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -484,7 +484,8 @@  void zero_fill_bio(struct bio *bio);
 
 static inline void bio_release_pages(struct bio *bio, bool mark_dirty)
 {
-	if (bio_flagged(bio, BIO_PAGE_REFFED))
+	if (bio_flagged(bio, BIO_PAGE_REFFED) ||
+	    bio_flagged(bio, BIO_PAGE_PINNED))
 		__bio_release_pages(bio, mark_dirty);
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 7daa261f4f98..a0e339ff3d09 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -318,6 +318,7 @@  struct bio {
  * bio flags
  */
 enum {
+	BIO_PAGE_PINNED,	/* Unpin pages in bio_release_pages() */
 	BIO_PAGE_REFFED,	/* put pages in bio_release_pages() */
 	BIO_CLONED,		/* doesn't own data */
 	BIO_BOUNCED,		/* bio is a bounce bio */