[v2,16/19] btrfs: use extent_io_tree to handle subpage extent buffer allocation

Message ID	20200915053532.63279-17-wqu@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <SRS0=LHCx=CY=vger.kernel.org=linux-btrfs-owner@kernel.org> From: Qu Wenruo <wqu@suse.com> To: linux-btrfs@vger.kernel.org Subject: [PATCH v2 16/19] btrfs: use extent_io_tree to handle subpage extent buffer allocation Date: Tue, 15 Sep 2020 13:35:29 +0800 Message-Id: <20200915053532.63279-17-wqu@suse.com> In-Reply-To: <20200915053532.63279-1-wqu@suse.com> References: <20200915053532.63279-1-wqu@suse.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk
Series	btrfs: add read-only support for subpage sector size \| expand [v2,00/19] btrfs: add read-only support for subpage sector size [v2,01/19] btrfs: extent-io-tests: remove invalid tests [v2,02/19] btrfs: remove the unnecessary parameter @start and @len for check_data_csum() [v2,03/19] btrfs: calculate inline extent buffer page size based on page size [v2,04/19] btrfs: remove the open-code to read disk-key [v2,05/19] btrfs: make btrfs_fs_info::buffer_radix to take sector size devided values [v2,06/19] btrfs: don't allow tree block to cross page boundary for subpage support [v2,07/19] btrfs: update num_extent_pages() to support subpage sized extent buffer [v2,08/19] btrfs: handle sectorsize < PAGE_SIZE case for extent buffer accessors [v2,09/19] btrfs: make csum_tree_block() handle sectorsize smaller than page size [v2,10/19] btrfs: add assert_spin_locked() for attach_extent_buffer_page() [v2,11/19] btrfs: extract the extent buffer verification from btree_readpage_end_io_hook() [v2,12/19] btrfs: extent_io: only require sector size alignment for page read [v2,13/19] btrfs: make btrfs_readpage_end_io_hook() follow sector size [v2,14/19] btrfs: make btree inode io_tree has its special owner [v2,15/19] btrfs: don't set extent_io_tree bits for btree inode at endio time [v2,16/19] btrfs: use extent_io_tree to handle subpage extent buffer allocation [v2,17/19] btrfs: implement subpage metadata read and its endio function [v2,18/19] btrfs: implement btree_readpage() and try_release_extent_buffer() for subpage [v2,19/19] btrfs: allow RO mount of 4K sector size fs on 64K page system

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index c47b6c6fea9f..cff818e0c406 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -217,6 +217,18 @@ static inline struct btrfs_inode *BTRFS_I(const struct inode *inode) return container_of(inode, struct btrfs_inode, vfs_inode); } +static inline struct btrfs_fs_info *page_to_fs_info(struct page *page) +{ + ASSERT(page->mapping); + return BTRFS_I(page->mapping->host)->root->fs_info; +} + +static inline struct extent_io_tree +*info_to_btree_io_tree(struct btrfs_fs_info *fs_info) +{ + return &BTRFS_I(fs_info->btree_inode)->io_tree; +} + static inline unsigned long btrfs_inode_hash(u64 objectid, const struct btrfs_root *root) { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b526adf20f3e..2ef35eb7a6e1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2120,6 +2120,13 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) inode->i_mapping->a_ops = &btree_aops; RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); + /* + * This extent io tree is subpage metadata specific. + * + * It uses the following bits to represent new meaning: + * - EXTENT_NEW: Has extent buffer allocated + * - EXTENT_UPTODATE Has latest metadata read from disk + */ extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, IO_TREE_BTREE_IO, inode); BTRFS_I(inode)->io_tree.track_uptodate = false; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 16fe9f4313a1..2af6786e6ab4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3116,6 +3116,20 @@ static void attach_extent_buffer_page(struct extent_buffer *eb, if (page->mapping) assert_spin_locked(&page->mapping->private_lock); + if (eb->fs_info->sectorsize < PAGE_SIZE && page->mapping) { + struct extent_io_tree *io_tree = + info_to_btree_io_tree(eb->fs_info); + + if (!PagePrivate(page)) + attach_page_private(page, NULL); + + /* EXTENT_NEW represents we have an extent buffer */ + set_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, + EXTENT_NEW, NULL, NULL, GFP_ATOMIC); + eb->pages[0] = page; + return; + } + if (!PagePrivate(page)) attach_page_private(page, eb); else @@ -4943,6 +4957,37 @@ int extent_buffer_under_io(const struct extent_buffer *eb) test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); } +static void detach_extent_buffer_subpage(struct extent_buffer *eb) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + struct extent_io_tree *io_tree = info_to_btree_io_tree(fs_info); + struct page *page = eb->pages[0]; + bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags); + int ret; + + if (!page) + return; + + if (mapped) + spin_lock(&page->mapping->private_lock); + + /* + * Clear the EXTENT_NEW bit from io tree, to indicate that there is + * no longer an extent buffer in the range. + */ + __clear_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, + EXTENT_NEW, 0, 0, NULL, GFP_ATOMIC, NULL); + + /* Test if we still have other extent buffer in the page range */ + ret = test_range_bit(io_tree, round_down(eb->start, PAGE_SIZE), + round_down(eb->start, PAGE_SIZE) + PAGE_SIZE - 1, + EXTENT_NEW, 0, NULL); + if (!ret) + detach_page_private(eb->pages[0]); + if (mapped) + spin_unlock(&page->mapping->private_lock); +} + /* * Release all pages attached to the extent buffer. */ @@ -4954,6 +4999,9 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) BUG_ON(extent_buffer_under_io(eb)); + if (eb->fs_info->sectorsize < PAGE_SIZE) + return detach_extent_buffer_subpage(eb); + num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { struct page *page = eb->pages[i]; @@ -5248,6 +5296,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, struct extent_buffer *exists = NULL; struct page *p; struct address_space *mapping = fs_info->btree_inode->i_mapping; + bool subpage = (fs_info->sectorsize < PAGE_SIZE); int uptodate = 1; int ret; @@ -5280,7 +5329,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, } spin_lock(&mapping->private_lock); - if (PagePrivate(p)) { + if (PagePrivate(p) && !subpage) { /* * We could have already allocated an eb for this page * and attached one so lets see if we can get a ref on @@ -5321,8 +5370,21 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, * we could crash. */ } - if (uptodate) + if (uptodate && !subpage) set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + /* + * For subpage, we must check extent_io_tree to get if the eb is really + * UPTODATE, as the page bit is no longer reliable as we can do subpage + * read. + */ + if (subpage) { + struct extent_io_tree *io_tree = info_to_btree_io_tree(fs_info); + + ret = test_range_bit(io_tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, NULL); + if (ret) + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + } again: ret = radix_tree_preload(GFP_NOFS); if (ret) { @@ -5361,6 +5423,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, if (eb->pages[i]) unlock_page(eb->pages[i]); } + /* + * For subpage case, btrfs_release_extent_buffer() will clear the + * EXTENT_NEW bit if there is a page, and EXTENT_NEW bit represents + * we have an extent buffer in that range. + * + * Since we're here because we hit a race with another caller, who + * succeeded in inserting the eb, we shouldn't clear that EXTENT_NEW + * bit. So here we cleanup the page manually. + */ + if (subpage) { + put_page(eb->pages[0]); + eb->pages[i] = NULL; + } btrfs_release_extent_buffer(eb); return exists;

[v2,16/19] btrfs: use extent_io_tree to handle subpage extent buffer allocation

Commit Message

Patch