diff mbox

[1/6] Btrfs: subpagesize-blocksize: Get rid of whole page reads.

Message ID 1394634033-2528-2-git-send-email-chandan@linux.vnet.ibm.com (mailing list archive)
State Deferred
Headers show

Commit Message

Chandan Rajendra March 12, 2014, 2:20 p.m. UTC
bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions
to track the file offset range operated on by the bio. Hence this patch adds
two new members to 'struct btrfs_io_bio' to track the file offset range.

This patch also brings back check_page_locked() to reliably unlock pages in
readpage's end bio function.

Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
---
 fs/btrfs/extent_io.c | 122 +++++++++++++++++++++++++++++++++------------------
 fs/btrfs/volumes.h   |   3 ++
 2 files changed, 82 insertions(+), 43 deletions(-)

Comments

Liu Bo March 19, 2014, 7:37 a.m. UTC | #1
On Wed, Mar 12, 2014 at 07:50:28PM +0530, Chandan Rajendra wrote:
> bio_vec->{bv_offset, bv_len} cannot be relied upon by the end bio functions
> to track the file offset range operated on by the bio. Hence this patch adds
> two new members to 'struct btrfs_io_bio' to track the file offset range.
> 
> This patch also brings back check_page_locked() to reliably unlock pages in
> readpage's end bio function.
> 
> Signed-off-by: Chandan Rajendra <chandan@linux.vnet.ibm.com>
> ---
>  fs/btrfs/extent_io.c | 122 +++++++++++++++++++++++++++++++++------------------
>  fs/btrfs/volumes.h   |   3 ++
>  2 files changed, 82 insertions(+), 43 deletions(-)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index fbe501d..5a65aee 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -1943,15 +1943,31 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
>   * helper function to set a given page up to date if all the
>   * extents in the tree for that page are up to date
>   */
> -static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
> +static void check_page_uptodate(struct extent_io_tree *tree, struct page *page,
> +				struct extent_state *cached)
>  {
>  	u64 start = page_offset(page);
>  	u64 end = start + PAGE_CACHE_SIZE - 1;
> -	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
> +	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached))
>  		SetPageUptodate(page);
>  }
>  
>  /*
> + * helper function to unlock a page if all the extents in the tree
> + * for that page are unlocked
> + */
> +static void check_page_locked(struct extent_io_tree *tree, struct page *page)
> +{
> +	u64 start = page_offset(page);
> +	u64 end = start + PAGE_CACHE_SIZE - 1;
> +
> +	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
> +		unlock_page(page);
> +	}
> +}
> +
> +
> +/*
>   * When IO fails, either with EIO or csum verification fails, we
>   * try other mirrors that might have a good copy of the data.  This
>   * io_failure_record is used to record state as we go through all the
> @@ -2414,16 +2430,33 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
>  	bio_put(bio);
>  }
>  
> -static void
> -endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
> -			      int uptodate)
> +static void unlock_extent_and_page(struct address_space *mapping,
> +				   struct extent_io_tree *tree,
> +				   struct btrfs_io_bio *io_bio)
>  {
> -	struct extent_state *cached = NULL;
> -	u64 end = start + len - 1;
> +	pgoff_t index;
> +	u64 offset, len;
> +	/*
> +	 * This btrfs_io_bio may span multiple pages.
> +	 * We need to unlock the pages convered by them
> +	 * if we got endio callback for all the blocks in the page.
> +	 * btrfs_io_bio also contain "contigous blocks of the file"
> +	 * look at submit_extent_page for more details.
> +	 */
>  
> -	if (uptodate && tree->track_uptodate)
> -		set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
> -	unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
> +	offset = io_bio->start_offset;
> +	len    = io_bio->len;
> +	unlock_extent(tree, offset, offset + len - 1);
> +
> +	index = offset >> PAGE_CACHE_SHIFT;
> +	while (offset < io_bio->start_offset + len) {
> +		struct page *page;
> +		page = find_get_page(mapping, index);
> +		check_page_locked(tree, page);
> +		page_cache_release(page);
> +		index++;
> +		offset += PAGE_CACHE_SIZE;
> +	}
>  }
>  
>  /*
> @@ -2443,13 +2476,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
>  	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
>  	struct bio_vec *bvec = bio->bi_io_vec;
>  	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
> +	struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
>  	struct extent_io_tree *tree;
> +	struct extent_state *cached = NULL;
>  	u64 offset = 0;
>  	u64 start;
>  	u64 end;
>  	u64 len;
> -	u64 extent_start = 0;
> -	u64 extent_len = 0;
>  	int mirror;
>  	int ret;
>  
> @@ -2482,8 +2515,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
>  					bvec->bv_offset, bvec->bv_len);
>  		}
>  
> -		start = page_offset(page);
> -		end = start + bvec->bv_offset + bvec->bv_len - 1;
> +		start = page_offset(page) + bvec->bv_offset;
> +		end = start + bvec->bv_len - 1;
>  		len = bvec->bv_len;
>  
>  		if (++bvec <= bvec_end)
> @@ -2540,40 +2573,24 @@ readpage_ok:
>  			offset = i_size & (PAGE_CACHE_SIZE-1);
>  			if (page->index == end_index && offset)
>  				zero_user_segment(page, offset, PAGE_CACHE_SIZE);
> -			SetPageUptodate(page);
> +			if (tree->track_uptodate)
> +				set_extent_uptodate(tree, start, end, &cached,
> +						    GFP_ATOMIC);
>  		} else {
>  			ClearPageUptodate(page);
>  			SetPageError(page);
>  		}
> -		unlock_page(page);
> -		offset += len;
>  
> -		if (unlikely(!uptodate)) {
> -			if (extent_len) {
> -				endio_readpage_release_extent(tree,
> -							      extent_start,
> -							      extent_len, 1);
> -				extent_start = 0;
> -				extent_len = 0;
> -			}
> -			endio_readpage_release_extent(tree, start,
> -						      end - start + 1, 0);
> -		} else if (!extent_len) {
> -			extent_start = start;
> -			extent_len = end + 1 - start;
> -		} else if (extent_start + extent_len == start) {
> -			extent_len += end + 1 - start;
> -		} else {
> -			endio_readpage_release_extent(tree, extent_start,
> -						      extent_len, uptodate);
> -			extent_start = start;
> -			extent_len = end + 1 - start;
> -		}
> +		offset += len;
> +		/*
> +		 * Check whether the page in the bvec can be marked uptodate
> +		 */
> +		check_page_uptodate(tree, page, cached);
>  	} while (bvec <= bvec_end);
> -
> -	if (extent_len)
> -		endio_readpage_release_extent(tree, extent_start, extent_len,
> -					      uptodate);
> +	/*
> +	 * Unlock the btrfs_bio and associated page
> +	 */
> +	unlock_extent_and_page(mapping, tree, io_bio);
>  	if (io_bio->end_io)
>  		io_bio->end_io(io_bio, err);
>  	bio_put(bio);
> @@ -2700,6 +2717,18 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  		else
>  			contig = bio_end_sector(bio) == sector;
>  
> +		if (contig) {
> +			/*
> +			 * Check whether we are contig if file offsets.
> +			 * We should mostly be for readpage/readpages
> +			 * We need to do this because we use btrfs_io_bio
> +			 * start_offset and len to unlock in endio routines.
> +			 */
> +			if ((page_offset(page) + offset) !=
> +					(btrfs_io_bio(bio)->start_offset +
> +					 btrfs_io_bio(bio)->len))
> +				contig = 0;
> +		}
>  		if (prev_bio_flags != bio_flags || !contig ||
>  		    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
>  		    bio_add_page(bio, page, page_size, offset) < page_size) {
> @@ -2709,6 +2738,11 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  				return ret;
>  			bio = NULL;
>  		} else {
> +			/*
> +			 * update btrfs_io_bio len. So that we can unlock
> +			 * correctly in end_io callback.
> +			 */
> +			btrfs_io_bio(bio)->len += page_size;
>  			return 0;
>  		}
>  	}
> @@ -2724,6 +2758,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
>  	bio_add_page(bio, page, page_size, offset);
>  	bio->bi_end_io = end_io_func;
>  	bio->bi_private = tree;
> +	btrfs_io_bio(bio)->start_offset = page_offset(page) + offset;
> +	btrfs_io_bio(bio)->len = page_size;
>  
>  	if (bio_ret)
>  		*bio_ret = bio;
> @@ -2914,7 +2950,7 @@ static int __do_readpage(struct extent_io_tree *tree,
>  		/* the get_extent function already copied into the page */
>  		if (test_range_bit(tree, cur, cur_end,
>  				   EXTENT_UPTODATE, 1, NULL)) {
> -			check_page_uptodate(tree, page);
> +			check_page_uptodate(tree, page, NULL);
>  			if (!parent_locked)
>  				unlock_extent(tree, cur, cur + iosize - 1);
>  			cur = cur + iosize;
> diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
> index 80754f9..2e16f4d 100644
> --- a/fs/btrfs/volumes.h
> +++ b/fs/btrfs/volumes.h
> @@ -174,6 +174,9 @@ struct btrfs_io_bio {
>  	u8 *csum_allocated;
>  	btrfs_io_bio_end_io_t *end_io;
>  	struct bio bio;
> +	/* Track file offset range operated on by the bio.*/
> +	u64 start_offset;
> +	u64 len;

These should be put in front of "struct bio bio",
otherwise, it might lead to errors, according to bioset_create()'s comments,

----------------------------------------------------------------------
"Note that the bio must be embedded at the END of that structure always,    
or things will break badly."
----------------------------------------------------------------------

-liubo
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chandan Rajendra March 19, 2014, 3:50 p.m. UTC | #2
> These should be put in front of "struct bio bio",
> otherwise, it might lead to errors, according to bioset_create()'s comments,
> 
> ----------------------------------------------------------------------
> "Note that the bio must be embedded at the END of that structure always,    
> or things will break badly."
> ----------------------------------------------------------------------
> 

Thank you for pointing that out. I will fix it.

Thanks,
chandan

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fbe501d..5a65aee 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1943,15 +1943,31 @@  int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
  * helper function to set a given page up to date if all the
  * extents in the tree for that page are up to date
  */
-static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
+static void check_page_uptodate(struct extent_io_tree *tree, struct page *page,
+				struct extent_state *cached)
 {
 	u64 start = page_offset(page);
 	u64 end = start + PAGE_CACHE_SIZE - 1;
-	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
+	if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, cached))
 		SetPageUptodate(page);
 }
 
 /*
+ * helper function to unlock a page if all the extents in the tree
+ * for that page are unlocked
+ */
+static void check_page_locked(struct extent_io_tree *tree, struct page *page)
+{
+	u64 start = page_offset(page);
+	u64 end = start + PAGE_CACHE_SIZE - 1;
+
+	if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) {
+		unlock_page(page);
+	}
+}
+
+
+/*
  * When IO fails, either with EIO or csum verification fails, we
  * try other mirrors that might have a good copy of the data.  This
  * io_failure_record is used to record state as we go through all the
@@ -2414,16 +2430,33 @@  static void end_bio_extent_writepage(struct bio *bio, int err)
 	bio_put(bio);
 }
 
-static void
-endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
-			      int uptodate)
+static void unlock_extent_and_page(struct address_space *mapping,
+				   struct extent_io_tree *tree,
+				   struct btrfs_io_bio *io_bio)
 {
-	struct extent_state *cached = NULL;
-	u64 end = start + len - 1;
+	pgoff_t index;
+	u64 offset, len;
+	/*
+	 * This btrfs_io_bio may span multiple pages.
+	 * We need to unlock the pages convered by them
+	 * if we got endio callback for all the blocks in the page.
+	 * btrfs_io_bio also contain "contigous blocks of the file"
+	 * look at submit_extent_page for more details.
+	 */
 
-	if (uptodate && tree->track_uptodate)
-		set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
-	unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
+	offset = io_bio->start_offset;
+	len    = io_bio->len;
+	unlock_extent(tree, offset, offset + len - 1);
+
+	index = offset >> PAGE_CACHE_SHIFT;
+	while (offset < io_bio->start_offset + len) {
+		struct page *page;
+		page = find_get_page(mapping, index);
+		check_page_locked(tree, page);
+		page_cache_release(page);
+		index++;
+		offset += PAGE_CACHE_SIZE;
+	}
 }
 
 /*
@@ -2443,13 +2476,13 @@  static void end_bio_extent_readpage(struct bio *bio, int err)
 	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct bio_vec *bvec = bio->bi_io_vec;
 	struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+	struct address_space *mapping = bio->bi_io_vec->bv_page->mapping;
 	struct extent_io_tree *tree;
+	struct extent_state *cached = NULL;
 	u64 offset = 0;
 	u64 start;
 	u64 end;
 	u64 len;
-	u64 extent_start = 0;
-	u64 extent_len = 0;
 	int mirror;
 	int ret;
 
@@ -2482,8 +2515,8 @@  static void end_bio_extent_readpage(struct bio *bio, int err)
 					bvec->bv_offset, bvec->bv_len);
 		}
 
-		start = page_offset(page);
-		end = start + bvec->bv_offset + bvec->bv_len - 1;
+		start = page_offset(page) + bvec->bv_offset;
+		end = start + bvec->bv_len - 1;
 		len = bvec->bv_len;
 
 		if (++bvec <= bvec_end)
@@ -2540,40 +2573,24 @@  readpage_ok:
 			offset = i_size & (PAGE_CACHE_SIZE-1);
 			if (page->index == end_index && offset)
 				zero_user_segment(page, offset, PAGE_CACHE_SIZE);
-			SetPageUptodate(page);
+			if (tree->track_uptodate)
+				set_extent_uptodate(tree, start, end, &cached,
+						    GFP_ATOMIC);
 		} else {
 			ClearPageUptodate(page);
 			SetPageError(page);
 		}
-		unlock_page(page);
-		offset += len;
 
-		if (unlikely(!uptodate)) {
-			if (extent_len) {
-				endio_readpage_release_extent(tree,
-							      extent_start,
-							      extent_len, 1);
-				extent_start = 0;
-				extent_len = 0;
-			}
-			endio_readpage_release_extent(tree, start,
-						      end - start + 1, 0);
-		} else if (!extent_len) {
-			extent_start = start;
-			extent_len = end + 1 - start;
-		} else if (extent_start + extent_len == start) {
-			extent_len += end + 1 - start;
-		} else {
-			endio_readpage_release_extent(tree, extent_start,
-						      extent_len, uptodate);
-			extent_start = start;
-			extent_len = end + 1 - start;
-		}
+		offset += len;
+		/*
+		 * Check whether the page in the bvec can be marked uptodate
+		 */
+		check_page_uptodate(tree, page, cached);
 	} while (bvec <= bvec_end);
-
-	if (extent_len)
-		endio_readpage_release_extent(tree, extent_start, extent_len,
-					      uptodate);
+	/*
+	 * Unlock the btrfs_bio and associated page
+	 */
+	unlock_extent_and_page(mapping, tree, io_bio);
 	if (io_bio->end_io)
 		io_bio->end_io(io_bio, err);
 	bio_put(bio);
@@ -2700,6 +2717,18 @@  static int submit_extent_page(int rw, struct extent_io_tree *tree,
 		else
 			contig = bio_end_sector(bio) == sector;
 
+		if (contig) {
+			/*
+			 * Check whether we are contig if file offsets.
+			 * We should mostly be for readpage/readpages
+			 * We need to do this because we use btrfs_io_bio
+			 * start_offset and len to unlock in endio routines.
+			 */
+			if ((page_offset(page) + offset) !=
+					(btrfs_io_bio(bio)->start_offset +
+					 btrfs_io_bio(bio)->len))
+				contig = 0;
+		}
 		if (prev_bio_flags != bio_flags || !contig ||
 		    merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
 		    bio_add_page(bio, page, page_size, offset) < page_size) {
@@ -2709,6 +2738,11 @@  static int submit_extent_page(int rw, struct extent_io_tree *tree,
 				return ret;
 			bio = NULL;
 		} else {
+			/*
+			 * update btrfs_io_bio len. So that we can unlock
+			 * correctly in end_io callback.
+			 */
+			btrfs_io_bio(bio)->len += page_size;
 			return 0;
 		}
 	}
@@ -2724,6 +2758,8 @@  static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
 	bio->bi_private = tree;
+	btrfs_io_bio(bio)->start_offset = page_offset(page) + offset;
+	btrfs_io_bio(bio)->len = page_size;
 
 	if (bio_ret)
 		*bio_ret = bio;
@@ -2914,7 +2950,7 @@  static int __do_readpage(struct extent_io_tree *tree,
 		/* the get_extent function already copied into the page */
 		if (test_range_bit(tree, cur, cur_end,
 				   EXTENT_UPTODATE, 1, NULL)) {
-			check_page_uptodate(tree, page);
+			check_page_uptodate(tree, page, NULL);
 			if (!parent_locked)
 				unlock_extent(tree, cur, cur + iosize - 1);
 			cur = cur + iosize;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 80754f9..2e16f4d 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -174,6 +174,9 @@  struct btrfs_io_bio {
 	u8 *csum_allocated;
 	btrfs_io_bio_end_io_t *end_io;
 	struct bio bio;
+	/* Track file offset range operated on by the bio.*/
+	u64 start_offset;
+	u64 len;
 };
 
 static inline struct btrfs_io_bio *btrfs_io_bio(struct bio *bio)