diff mbox series

[v3,20/22] btrfs: introduce btrfs_subpage for data inodes

Message ID 20210106010201.37864-21-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add read-only support for subpage sector size | expand

Commit Message

Qu Wenruo Jan. 6, 2021, 1:01 a.m. UTC
To support subpage sector size, data also need extra info to make sure
which sectors in a page are uptodate/dirty/...

This patch will make pages for data inodes to get btrfs_subpage
structure attached, and detached when the page is freed.

This patch also slightly changes the timing when
set_page_extent_mapped() to make sure:

- We have page->mapping set
  page->mapping->host is used to grab btrfs_fs_info, thus we can only
  call this function after page is mapped to an inode.

  One call site attaches pages to inode manually, thus we have to modify
  the timing of set_page_extent_mapped() a little.

- As soon as possible, before other operations
  Since memory allocation can fail, we have to do extra error handling.
  Calling set_page_extent_mapped() as soon as possible can simply the
  error handling for several call sites.

The idea is pretty much the same as iomap_page, but with more bitmaps
for btrfs specific cases.

Currently the plan is to switch iomap if iomap can provide sector
aligned write back (only write back dirty sectors, but not the full
page, data balance require this feature).

So we will stick to btrfs specific bitmap for now.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/compression.c      | 10 ++++++--
 fs/btrfs/extent_io.c        | 46 +++++++++++++++++++++++++++++++++----
 fs/btrfs/extent_io.h        |  3 ++-
 fs/btrfs/file.c             | 24 ++++++++-----------
 fs/btrfs/free-space-cache.c | 15 +++++++++---
 fs/btrfs/inode.c            | 12 ++++++----
 fs/btrfs/ioctl.c            |  5 +++-
 fs/btrfs/reflink.c          |  5 +++-
 fs/btrfs/relocation.c       | 12 ++++++++--
 9 files changed, 99 insertions(+), 33 deletions(-)

Comments

kernel test robot Jan. 6, 2021, 5:04 a.m. UTC | #1
Hi Qu,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kdave/for-next]
[also build test WARNING on v5.11-rc2 next-20210104]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
config: m68k-randconfig-s032-20210106 (attached as .config)
compiler: m68k-linux-gcc (GCC) 9.3.0
reproduce:
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # apt-get install sparse
        # sparse version: v0.6.3-208-g46a52ca4-dirty
        # https://github.com/0day-ci/linux/commit/2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
        git checkout 2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=m68k 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> fs/btrfs/inode.c:8352:13: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted vm_fault_t [assigned] [usertype] ret @@     got int @@
   fs/btrfs/inode.c:8352:13: sparse:     expected restricted vm_fault_t [assigned] [usertype] ret
   fs/btrfs/inode.c:8352:13: sparse:     got int
>> fs/btrfs/inode.c:8353:13: sparse: sparse: restricted vm_fault_t degrades to integer
   fs/btrfs/inode.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h, ...):
   include/linux/spinlock.h:394:9: sparse: sparse: context imbalance in 'run_delayed_iput_locked' - unexpected unlock

vim +8352 fs/btrfs/inode.c

  8275	
  8276	/*
  8277	 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
  8278	 * called from a page fault handler when a page is first dirtied. Hence we must
  8279	 * be careful to check for EOF conditions here. We set the page up correctly
  8280	 * for a written page which means we get ENOSPC checking when writing into
  8281	 * holes and correct delalloc and unwritten extent mapping on filesystems that
  8282	 * support these features.
  8283	 *
  8284	 * We are not allowed to take the i_mutex here so we have to play games to
  8285	 * protect against truncate races as the page could now be beyond EOF.  Because
  8286	 * truncate_setsize() writes the inode size before removing pages, once we have
  8287	 * the page lock we can determine safely if the page is beyond EOF. If it is not
  8288	 * beyond EOF, then the page is guaranteed safe against truncation until we
  8289	 * unlock the page.
  8290	 */
  8291	vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
  8292	{
  8293		struct page *page = vmf->page;
  8294		struct inode *inode = file_inode(vmf->vma->vm_file);
  8295		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
  8296		struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
  8297		struct btrfs_ordered_extent *ordered;
  8298		struct extent_state *cached_state = NULL;
  8299		struct extent_changeset *data_reserved = NULL;
  8300		char *kaddr;
  8301		unsigned long zero_start;
  8302		loff_t size;
  8303		vm_fault_t ret;
  8304		int ret2;
  8305		int reserved = 0;
  8306		u64 reserved_space;
  8307		u64 page_start;
  8308		u64 page_end;
  8309		u64 end;
  8310	
  8311		reserved_space = PAGE_SIZE;
  8312	
  8313		sb_start_pagefault(inode->i_sb);
  8314		page_start = page_offset(page);
  8315		page_end = page_start + PAGE_SIZE - 1;
  8316		end = page_end;
  8317	
  8318		/*
  8319		 * Reserving delalloc space after obtaining the page lock can lead to
  8320		 * deadlock. For example, if a dirty page is locked by this function
  8321		 * and the call to btrfs_delalloc_reserve_space() ends up triggering
  8322		 * dirty page write out, then the btrfs_writepage() function could
  8323		 * end up waiting indefinitely to get a lock on the page currently
  8324		 * being processed by btrfs_page_mkwrite() function.
  8325		 */
  8326		ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
  8327						    page_start, reserved_space);
  8328		if (!ret2) {
  8329			ret2 = file_update_time(vmf->vma->vm_file);
  8330			reserved = 1;
  8331		}
  8332		if (ret2) {
  8333			ret = vmf_error(ret2);
  8334			if (reserved)
  8335				goto out;
  8336			goto out_noreserve;
  8337		}
  8338	
  8339		ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
  8340	again:
  8341		lock_page(page);
  8342		size = i_size_read(inode);
  8343	
  8344		if ((page->mapping != inode->i_mapping) ||
  8345		    (page_start >= size)) {
  8346			/* page got truncated out from underneath us */
  8347			goto out_unlock;
  8348		}
  8349		wait_on_page_writeback(page);
  8350	
  8351		lock_extent_bits(io_tree, page_start, page_end, &cached_state);
> 8352		ret = set_page_extent_mapped(page);
> 8353		if (ret < 0)
  8354			goto out_unlock;
  8355	
  8356		/*
  8357		 * we can't set the delalloc bits if there are pending ordered
  8358		 * extents.  Drop our locks and wait for them to finish
  8359		 */
  8360		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
  8361				PAGE_SIZE);
  8362		if (ordered) {
  8363			unlock_extent_cached(io_tree, page_start, page_end,
  8364					     &cached_state);
  8365			unlock_page(page);
  8366			btrfs_start_ordered_extent(ordered, 1);
  8367			btrfs_put_ordered_extent(ordered);
  8368			goto again;
  8369		}
  8370	
  8371		if (page->index == ((size - 1) >> PAGE_SHIFT)) {
  8372			reserved_space = round_up(size - page_start,
  8373						  fs_info->sectorsize);
  8374			if (reserved_space < PAGE_SIZE) {
  8375				end = page_start + reserved_space - 1;
  8376				btrfs_delalloc_release_space(BTRFS_I(inode),
  8377						data_reserved, page_start,
  8378						PAGE_SIZE - reserved_space, true);
  8379			}
  8380		}
  8381	
  8382		/*
  8383		 * page_mkwrite gets called when the page is firstly dirtied after it's
  8384		 * faulted in, but write(2) could also dirty a page and set delalloc
  8385		 * bits, thus in this case for space account reason, we still need to
  8386		 * clear any delalloc bits within this page range since we have to
  8387		 * reserve data&meta space before lock_page() (see above comments).
  8388		 */
  8389		clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
  8390				  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
  8391				  EXTENT_DEFRAG, 0, 0, &cached_state);
  8392	
  8393		ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
  8394						&cached_state);
  8395		if (ret2) {
  8396			unlock_extent_cached(io_tree, page_start, page_end,
  8397					     &cached_state);
  8398			ret = VM_FAULT_SIGBUS;
  8399			goto out_unlock;
  8400		}
  8401	
  8402		/* page is wholly or partially inside EOF */
  8403		if (page_start + PAGE_SIZE > size)
  8404			zero_start = offset_in_page(size);
  8405		else
  8406			zero_start = PAGE_SIZE;
  8407	
  8408		if (zero_start != PAGE_SIZE) {
  8409			kaddr = kmap(page);
  8410			memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
  8411			flush_dcache_page(page);
  8412			kunmap(page);
  8413		}
  8414		ClearPageChecked(page);
  8415		set_page_dirty(page);
  8416		SetPageUptodate(page);
  8417	
  8418		BTRFS_I(inode)->last_trans = fs_info->generation;
  8419		BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
  8420		BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
  8421	
  8422		unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
  8423	
  8424		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
  8425		sb_end_pagefault(inode->i_sb);
  8426		extent_changeset_free(data_reserved);
  8427		return VM_FAULT_LOCKED;
  8428	
  8429	out_unlock:
  8430		unlock_page(page);
  8431	out:
  8432		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
  8433		btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
  8434					     reserved_space, (ret != 0));
  8435	out_noreserve:
  8436		sb_end_pagefault(inode->i_sb);
  8437		extent_changeset_free(data_reserved);
  8438		return ret;
  8439	}
  8440	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Qu Wenruo Jan. 6, 2021, 5:32 a.m. UTC | #2
On 2021/1/6 下午1:04, kernel test robot wrote:
> Hi Qu,
>
> Thank you for the patch! Perhaps something to improve:
>
> [auto build test WARNING on kdave/for-next]
> [also build test WARNING on v5.11-rc2 next-20210104]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch]
>
> url:    https://github.com/0day-ci/linux/commits/Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
> config: m68k-randconfig-s032-20210106 (attached as .config)
> compiler: m68k-linux-gcc (GCC) 9.3.0
> reproduce:
>          wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>          chmod +x ~/bin/make.cross
>          # apt-get install sparse
>          # sparse version: v0.6.3-208-g46a52ca4-dirty
>          # https://github.com/0day-ci/linux/commit/2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
>          git remote add linux-review https://github.com/0day-ci/linux
>          git fetch --no-tags linux-review Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
>          git checkout 2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
>          # save the attached .config to linux build tree
>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=m68k
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot <lkp@intel.com>
>
>
> "sparse warnings: (new ones prefixed by >>)"
>>> fs/btrfs/inode.c:8352:13: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted vm_fault_t [assigned] [usertype] ret @@     got int @@
>     fs/btrfs/inode.c:8352:13: sparse:     expected restricted vm_fault_t [assigned] [usertype] ret
>     fs/btrfs/inode.c:8352:13: sparse:     got int
>>> fs/btrfs/inode.c:8353:13: sparse: sparse: restricted vm_fault_t degrades to integer

Why I always forgot this...

Now it get properly fixed in github. Although the submitted patch is
still using @ret other than @ret2.

Is there anyway to let LKP to run on specific branch so that I can avoid
similar problems.

Thanks,
Qu
>     fs/btrfs/inode.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h, ...):
>     include/linux/spinlock.h:394:9: sparse: sparse: context imbalance in 'run_delayed_iput_locked' - unexpected unlock
>
> vim +8352 fs/btrfs/inode.c
>
>    8275
>    8276	/*
>    8277	 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
>    8278	 * called from a page fault handler when a page is first dirtied. Hence we must
>    8279	 * be careful to check for EOF conditions here. We set the page up correctly
>    8280	 * for a written page which means we get ENOSPC checking when writing into
>    8281	 * holes and correct delalloc and unwritten extent mapping on filesystems that
>    8282	 * support these features.
>    8283	 *
>    8284	 * We are not allowed to take the i_mutex here so we have to play games to
>    8285	 * protect against truncate races as the page could now be beyond EOF.  Because
>    8286	 * truncate_setsize() writes the inode size before removing pages, once we have
>    8287	 * the page lock we can determine safely if the page is beyond EOF. If it is not
>    8288	 * beyond EOF, then the page is guaranteed safe against truncation until we
>    8289	 * unlock the page.
>    8290	 */
>    8291	vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
>    8292	{
>    8293		struct page *page = vmf->page;
>    8294		struct inode *inode = file_inode(vmf->vma->vm_file);
>    8295		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>    8296		struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
>    8297		struct btrfs_ordered_extent *ordered;
>    8298		struct extent_state *cached_state = NULL;
>    8299		struct extent_changeset *data_reserved = NULL;
>    8300		char *kaddr;
>    8301		unsigned long zero_start;
>    8302		loff_t size;
>    8303		vm_fault_t ret;
>    8304		int ret2;
>    8305		int reserved = 0;
>    8306		u64 reserved_space;
>    8307		u64 page_start;
>    8308		u64 page_end;
>    8309		u64 end;
>    8310
>    8311		reserved_space = PAGE_SIZE;
>    8312
>    8313		sb_start_pagefault(inode->i_sb);
>    8314		page_start = page_offset(page);
>    8315		page_end = page_start + PAGE_SIZE - 1;
>    8316		end = page_end;
>    8317
>    8318		/*
>    8319		 * Reserving delalloc space after obtaining the page lock can lead to
>    8320		 * deadlock. For example, if a dirty page is locked by this function
>    8321		 * and the call to btrfs_delalloc_reserve_space() ends up triggering
>    8322		 * dirty page write out, then the btrfs_writepage() function could
>    8323		 * end up waiting indefinitely to get a lock on the page currently
>    8324		 * being processed by btrfs_page_mkwrite() function.
>    8325		 */
>    8326		ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
>    8327						    page_start, reserved_space);
>    8328		if (!ret2) {
>    8329			ret2 = file_update_time(vmf->vma->vm_file);
>    8330			reserved = 1;
>    8331		}
>    8332		if (ret2) {
>    8333			ret = vmf_error(ret2);
>    8334			if (reserved)
>    8335				goto out;
>    8336			goto out_noreserve;
>    8337		}
>    8338
>    8339		ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
>    8340	again:
>    8341		lock_page(page);
>    8342		size = i_size_read(inode);
>    8343
>    8344		if ((page->mapping != inode->i_mapping) ||
>    8345		    (page_start >= size)) {
>    8346			/* page got truncated out from underneath us */
>    8347			goto out_unlock;
>    8348		}
>    8349		wait_on_page_writeback(page);
>    8350
>    8351		lock_extent_bits(io_tree, page_start, page_end, &cached_state);
>> 8352		ret = set_page_extent_mapped(page);
>> 8353		if (ret < 0)
>    8354			goto out_unlock;
>    8355
>    8356		/*
>    8357		 * we can't set the delalloc bits if there are pending ordered
>    8358		 * extents.  Drop our locks and wait for them to finish
>    8359		 */
>    8360		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
>    8361				PAGE_SIZE);
>    8362		if (ordered) {
>    8363			unlock_extent_cached(io_tree, page_start, page_end,
>    8364					     &cached_state);
>    8365			unlock_page(page);
>    8366			btrfs_start_ordered_extent(ordered, 1);
>    8367			btrfs_put_ordered_extent(ordered);
>    8368			goto again;
>    8369		}
>    8370
>    8371		if (page->index == ((size - 1) >> PAGE_SHIFT)) {
>    8372			reserved_space = round_up(size - page_start,
>    8373						  fs_info->sectorsize);
>    8374			if (reserved_space < PAGE_SIZE) {
>    8375				end = page_start + reserved_space - 1;
>    8376				btrfs_delalloc_release_space(BTRFS_I(inode),
>    8377						data_reserved, page_start,
>    8378						PAGE_SIZE - reserved_space, true);
>    8379			}
>    8380		}
>    8381
>    8382		/*
>    8383		 * page_mkwrite gets called when the page is firstly dirtied after it's
>    8384		 * faulted in, but write(2) could also dirty a page and set delalloc
>    8385		 * bits, thus in this case for space account reason, we still need to
>    8386		 * clear any delalloc bits within this page range since we have to
>    8387		 * reserve data&meta space before lock_page() (see above comments).
>    8388		 */
>    8389		clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
>    8390				  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
>    8391				  EXTENT_DEFRAG, 0, 0, &cached_state);
>    8392
>    8393		ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
>    8394						&cached_state);
>    8395		if (ret2) {
>    8396			unlock_extent_cached(io_tree, page_start, page_end,
>    8397					     &cached_state);
>    8398			ret = VM_FAULT_SIGBUS;
>    8399			goto out_unlock;
>    8400		}
>    8401
>    8402		/* page is wholly or partially inside EOF */
>    8403		if (page_start + PAGE_SIZE > size)
>    8404			zero_start = offset_in_page(size);
>    8405		else
>    8406			zero_start = PAGE_SIZE;
>    8407
>    8408		if (zero_start != PAGE_SIZE) {
>    8409			kaddr = kmap(page);
>    8410			memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
>    8411			flush_dcache_page(page);
>    8412			kunmap(page);
>    8413		}
>    8414		ClearPageChecked(page);
>    8415		set_page_dirty(page);
>    8416		SetPageUptodate(page);
>    8417
>    8418		BTRFS_I(inode)->last_trans = fs_info->generation;
>    8419		BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
>    8420		BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
>    8421
>    8422		unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
>    8423
>    8424		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
>    8425		sb_end_pagefault(inode->i_sb);
>    8426		extent_changeset_free(data_reserved);
>    8427		return VM_FAULT_LOCKED;
>    8428
>    8429	out_unlock:
>    8430		unlock_page(page);
>    8431	out:
>    8432		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
>    8433		btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
>    8434					     reserved_space, (ret != 0));
>    8435	out_noreserve:
>    8436		sb_end_pagefault(inode->i_sb);
>    8437		extent_changeset_free(data_reserved);
>    8438		return ret;
>    8439	}
>    8440
>
> ---
> 0-DAY CI Kernel Test Service, Intel Corporation
> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
>
kernel test robot Jan. 6, 2021, 6:48 a.m. UTC | #3
On 1/6/21 1:32 PM, Qu Wenruo wrote:
>
>
> On 2021/1/6 下午1:04, kernel test robot wrote:
>> Hi Qu,
>>
>> Thank you for the patch! Perhaps something to improve:
>>
>> [auto build test WARNING on kdave/for-next]
>> [also build test WARNING on v5.11-rc2 next-20210104]
>> [If your patch is applied to the wrong git tree, kindly drop us a note.
>> And when submitting patch, we suggest to use '--base' as documented in
>> https://git-scm.com/docs/git-format-patch]
>>
>> url: 
>> https://github.com/0day-ci/linux/commits/Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
>> base: https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git 
>> for-next
>> config: m68k-randconfig-s032-20210106 (attached as .config)
>> compiler: m68k-linux-gcc (GCC) 9.3.0
>> reproduce:
>>          wget 
>> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross 
>> -O ~/bin/make.cross
>>          chmod +x ~/bin/make.cross
>>          # apt-get install sparse
>>          # sparse version: v0.6.3-208-g46a52ca4-dirty
>>          # 
>> https://github.com/0day-ci/linux/commit/2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
>>          git remote add linux-review https://github.com/0day-ci/linux
>>          git fetch --no-tags linux-review 
>> Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
>>          git checkout 2c54bbf363f66a7c4d489fa0b7967ce5fc960afb
>>          # save the attached .config to linux build tree
>>          COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 
>> make.cross C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=m68k
>>
>> If you fix the issue, kindly add following tag as appropriate
>> Reported-by: kernel test robot <lkp@intel.com>
>>
>>
>> "sparse warnings: (new ones prefixed by >>)"
>>>> fs/btrfs/inode.c:8352:13: sparse: sparse: incorrect type in 
>>>> assignment (different base types) @@     expected restricted 
>>>> vm_fault_t [assigned] [usertype] ret @@     got int @@
>>     fs/btrfs/inode.c:8352:13: sparse:     expected restricted 
>> vm_fault_t [assigned] [usertype] ret
>>     fs/btrfs/inode.c:8352:13: sparse:     got int
>>>> fs/btrfs/inode.c:8353:13: sparse: sparse: restricted vm_fault_t 
>>>> degrades to integer
>
> Why I always forgot this...
>
> Now it get properly fixed in github. Although the submitted patch is
> still using @ret other than @ret2.
>
> Is there anyway to let LKP to run on specific branch so that I can avoid
> similar problems.

Hi Qu,

LKP can test private branches if you can provide the git tree url, 
please see
https://github.com/intel/lkp-tests/wiki/LKP-FAQ#how-to-request-testing-a-new-kernel-tree-on-lkp

Best Regards,
Rong Chen

>
> Thanks,
> Qu
>>     fs/btrfs/inode.c: note: in included file (through 
>> include/linux/mmzone.h, include/linux/gfp.h, include/linux/slab.h, ...):
>>     include/linux/spinlock.h:394:9: sparse: sparse: context imbalance 
>> in 'run_delayed_iput_locked' - unexpected unlock
>>
>> vim +8352 fs/btrfs/inode.c
>>
>>    8275
>>    8276    /*
>>    8277     * btrfs_page_mkwrite() is not allowed to change the file 
>> size as it gets
>>    8278     * called from a page fault handler when a page is first 
>> dirtied. Hence we must
>>    8279     * be careful to check for EOF conditions here. We set the 
>> page up correctly
>>    8280     * for a written page which means we get ENOSPC checking 
>> when writing into
>>    8281     * holes and correct delalloc and unwritten extent mapping 
>> on filesystems that
>>    8282     * support these features.
>>    8283     *
>>    8284     * We are not allowed to take the i_mutex here so we have 
>> to play games to
>>    8285     * protect against truncate races as the page could now be 
>> beyond EOF.  Because
>>    8286     * truncate_setsize() writes the inode size before 
>> removing pages, once we have
>>    8287     * the page lock we can determine safely if the page is 
>> beyond EOF. If it is not
>>    8288     * beyond EOF, then the page is guaranteed safe against 
>> truncation until we
>>    8289     * unlock the page.
>>    8290     */
>>    8291    vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
>>    8292    {
>>    8293        struct page *page = vmf->page;
>>    8294        struct inode *inode = file_inode(vmf->vma->vm_file);
>>    8295        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
>>    8296        struct extent_io_tree *io_tree = 
>> &BTRFS_I(inode)->io_tree;
>>    8297        struct btrfs_ordered_extent *ordered;
>>    8298        struct extent_state *cached_state = NULL;
>>    8299        struct extent_changeset *data_reserved = NULL;
>>    8300        char *kaddr;
>>    8301        unsigned long zero_start;
>>    8302        loff_t size;
>>    8303        vm_fault_t ret;
>>    8304        int ret2;
>>    8305        int reserved = 0;
>>    8306        u64 reserved_space;
>>    8307        u64 page_start;
>>    8308        u64 page_end;
>>    8309        u64 end;
>>    8310
>>    8311        reserved_space = PAGE_SIZE;
>>    8312
>>    8313        sb_start_pagefault(inode->i_sb);
>>    8314        page_start = page_offset(page);
>>    8315        page_end = page_start + PAGE_SIZE - 1;
>>    8316        end = page_end;
>>    8317
>>    8318        /*
>>    8319         * Reserving delalloc space after obtaining the page 
>> lock can lead to
>>    8320         * deadlock. For example, if a dirty page is locked by 
>> this function
>>    8321         * and the call to btrfs_delalloc_reserve_space() ends 
>> up triggering
>>    8322         * dirty page write out, then the btrfs_writepage() 
>> function could
>>    8323         * end up waiting indefinitely to get a lock on the 
>> page currently
>>    8324         * being processed by btrfs_page_mkwrite() function.
>>    8325         */
>>    8326        ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), 
>> &data_reserved,
>>    8327                            page_start, reserved_space);
>>    8328        if (!ret2) {
>>    8329            ret2 = file_update_time(vmf->vma->vm_file);
>>    8330            reserved = 1;
>>    8331        }
>>    8332        if (ret2) {
>>    8333            ret = vmf_error(ret2);
>>    8334            if (reserved)
>>    8335                goto out;
>>    8336            goto out_noreserve;
>>    8337        }
>>    8338
>>    8339        ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
>>    8340    again:
>>    8341        lock_page(page);
>>    8342        size = i_size_read(inode);
>>    8343
>>    8344        if ((page->mapping != inode->i_mapping) ||
>>    8345            (page_start >= size)) {
>>    8346            /* page got truncated out from underneath us */
>>    8347            goto out_unlock;
>>    8348        }
>>    8349        wait_on_page_writeback(page);
>>    8350
>>    8351        lock_extent_bits(io_tree, page_start, page_end, 
>> &cached_state);
>>> 8352        ret = set_page_extent_mapped(page);
>>> 8353        if (ret < 0)
>>    8354            goto out_unlock;
>>    8355
>>    8356        /*
>>    8357         * we can't set the delalloc bits if there are pending 
>> ordered
>>    8358         * extents.  Drop our locks and wait for them to finish
>>    8359         */
>>    8360        ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), 
>> page_start,
>>    8361                PAGE_SIZE);
>>    8362        if (ordered) {
>>    8363            unlock_extent_cached(io_tree, page_start, page_end,
>>    8364                         &cached_state);
>>    8365            unlock_page(page);
>>    8366            btrfs_start_ordered_extent(ordered, 1);
>>    8367            btrfs_put_ordered_extent(ordered);
>>    8368            goto again;
>>    8369        }
>>    8370
>>    8371        if (page->index == ((size - 1) >> PAGE_SHIFT)) {
>>    8372            reserved_space = round_up(size - page_start,
>>    8373                          fs_info->sectorsize);
>>    8374            if (reserved_space < PAGE_SIZE) {
>>    8375                end = page_start + reserved_space - 1;
>>    8376 btrfs_delalloc_release_space(BTRFS_I(inode),
>>    8377                        data_reserved, page_start,
>>    8378                        PAGE_SIZE - reserved_space, true);
>>    8379            }
>>    8380        }
>>    8381
>>    8382        /*
>>    8383         * page_mkwrite gets called when the page is firstly 
>> dirtied after it's
>>    8384         * faulted in, but write(2) could also dirty a page 
>> and set delalloc
>>    8385         * bits, thus in this case for space account reason, 
>> we still need to
>>    8386         * clear any delalloc bits within this page range 
>> since we have to
>>    8387         * reserve data&meta space before lock_page() (see 
>> above comments).
>>    8388         */
>>    8389        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 
>> end,
>>    8390                  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
>>    8391                  EXTENT_DEFRAG, 0, 0, &cached_state);
>>    8392
>>    8393        ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), 
>> page_start, end, 0,
>>    8394                        &cached_state);
>>    8395        if (ret2) {
>>    8396            unlock_extent_cached(io_tree, page_start, page_end,
>>    8397                         &cached_state);
>>    8398            ret = VM_FAULT_SIGBUS;
>>    8399            goto out_unlock;
>>    8400        }
>>    8401
>>    8402        /* page is wholly or partially inside EOF */
>>    8403        if (page_start + PAGE_SIZE > size)
>>    8404            zero_start = offset_in_page(size);
>>    8405        else
>>    8406            zero_start = PAGE_SIZE;
>>    8407
>>    8408        if (zero_start != PAGE_SIZE) {
>>    8409            kaddr = kmap(page);
>>    8410            memset(kaddr + zero_start, 0, PAGE_SIZE - 
>> zero_start);
>>    8411            flush_dcache_page(page);
>>    8412            kunmap(page);
>>    8413        }
>>    8414        ClearPageChecked(page);
>>    8415        set_page_dirty(page);
>>    8416        SetPageUptodate(page);
>>    8417
>>    8418        BTRFS_I(inode)->last_trans = fs_info->generation;
>>    8419        BTRFS_I(inode)->last_sub_trans = 
>> BTRFS_I(inode)->root->log_transid;
>>    8420        BTRFS_I(inode)->last_log_commit = 
>> BTRFS_I(inode)->root->last_log_commit;
>>    8421
>>    8422        unlock_extent_cached(io_tree, page_start, page_end, 
>> &cached_state);
>>    8423
>>    8424        btrfs_delalloc_release_extents(BTRFS_I(inode), 
>> PAGE_SIZE);
>>    8425        sb_end_pagefault(inode->i_sb);
>>    8426        extent_changeset_free(data_reserved);
>>    8427        return VM_FAULT_LOCKED;
>>    8428
>>    8429    out_unlock:
>>    8430        unlock_page(page);
>>    8431    out:
>>    8432        btrfs_delalloc_release_extents(BTRFS_I(inode), 
>> PAGE_SIZE);
>>    8433        btrfs_delalloc_release_space(BTRFS_I(inode), 
>> data_reserved, page_start,
>>    8434                         reserved_space, (ret != 0));
>>    8435    out_noreserve:
>>    8436        sb_end_pagefault(inode->i_sb);
>>    8437        extent_changeset_free(data_reserved);
>>    8438        return ret;
>>    8439    }
>>    8440
>>
>> ---
>> 0-DAY CI Kernel Test Service, Intel Corporation
>> https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
>>
>
kernel test robot Jan. 9, 2021, 9:53 a.m. UTC | #4
Hi Qu,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on kdave/for-next]
[also build test WARNING on v5.11-rc2 next-20210108]
[cannot apply to btrfs/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Qu-Wenruo/btrfs-add-read-only-support-for-subpage-sector-size/20210106-090847
base:   https://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-next
config: i386-randconfig-m021-20210108 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

smatch warnings:
fs/btrfs/inode.c:8353 btrfs_page_mkwrite() warn: unsigned 'ret' is never less than zero.

vim +/ret +8353 fs/btrfs/inode.c

  8275	
  8276	/*
  8277	 * btrfs_page_mkwrite() is not allowed to change the file size as it gets
  8278	 * called from a page fault handler when a page is first dirtied. Hence we must
  8279	 * be careful to check for EOF conditions here. We set the page up correctly
  8280	 * for a written page which means we get ENOSPC checking when writing into
  8281	 * holes and correct delalloc and unwritten extent mapping on filesystems that
  8282	 * support these features.
  8283	 *
  8284	 * We are not allowed to take the i_mutex here so we have to play games to
  8285	 * protect against truncate races as the page could now be beyond EOF.  Because
  8286	 * truncate_setsize() writes the inode size before removing pages, once we have
  8287	 * the page lock we can determine safely if the page is beyond EOF. If it is not
  8288	 * beyond EOF, then the page is guaranteed safe against truncation until we
  8289	 * unlock the page.
  8290	 */
  8291	vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
  8292	{
  8293		struct page *page = vmf->page;
  8294		struct inode *inode = file_inode(vmf->vma->vm_file);
  8295		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
  8296		struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
  8297		struct btrfs_ordered_extent *ordered;
  8298		struct extent_state *cached_state = NULL;
  8299		struct extent_changeset *data_reserved = NULL;
  8300		char *kaddr;
  8301		unsigned long zero_start;
  8302		loff_t size;
  8303		vm_fault_t ret;
  8304		int ret2;
  8305		int reserved = 0;
  8306		u64 reserved_space;
  8307		u64 page_start;
  8308		u64 page_end;
  8309		u64 end;
  8310	
  8311		reserved_space = PAGE_SIZE;
  8312	
  8313		sb_start_pagefault(inode->i_sb);
  8314		page_start = page_offset(page);
  8315		page_end = page_start + PAGE_SIZE - 1;
  8316		end = page_end;
  8317	
  8318		/*
  8319		 * Reserving delalloc space after obtaining the page lock can lead to
  8320		 * deadlock. For example, if a dirty page is locked by this function
  8321		 * and the call to btrfs_delalloc_reserve_space() ends up triggering
  8322		 * dirty page write out, then the btrfs_writepage() function could
  8323		 * end up waiting indefinitely to get a lock on the page currently
  8324		 * being processed by btrfs_page_mkwrite() function.
  8325		 */
  8326		ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
  8327						    page_start, reserved_space);
  8328		if (!ret2) {
  8329			ret2 = file_update_time(vmf->vma->vm_file);
  8330			reserved = 1;
  8331		}
  8332		if (ret2) {
  8333			ret = vmf_error(ret2);
  8334			if (reserved)
  8335				goto out;
  8336			goto out_noreserve;
  8337		}
  8338	
  8339		ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */
  8340	again:
  8341		lock_page(page);
  8342		size = i_size_read(inode);
  8343	
  8344		if ((page->mapping != inode->i_mapping) ||
  8345		    (page_start >= size)) {
  8346			/* page got truncated out from underneath us */
  8347			goto out_unlock;
  8348		}
  8349		wait_on_page_writeback(page);
  8350	
  8351		lock_extent_bits(io_tree, page_start, page_end, &cached_state);
  8352		ret = set_page_extent_mapped(page);
> 8353		if (ret < 0)
  8354			goto out_unlock;
  8355	
  8356		/*
  8357		 * we can't set the delalloc bits if there are pending ordered
  8358		 * extents.  Drop our locks and wait for them to finish
  8359		 */
  8360		ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start,
  8361				PAGE_SIZE);
  8362		if (ordered) {
  8363			unlock_extent_cached(io_tree, page_start, page_end,
  8364					     &cached_state);
  8365			unlock_page(page);
  8366			btrfs_start_ordered_extent(ordered, 1);
  8367			btrfs_put_ordered_extent(ordered);
  8368			goto again;
  8369		}
  8370	
  8371		if (page->index == ((size - 1) >> PAGE_SHIFT)) {
  8372			reserved_space = round_up(size - page_start,
  8373						  fs_info->sectorsize);
  8374			if (reserved_space < PAGE_SIZE) {
  8375				end = page_start + reserved_space - 1;
  8376				btrfs_delalloc_release_space(BTRFS_I(inode),
  8377						data_reserved, page_start,
  8378						PAGE_SIZE - reserved_space, true);
  8379			}
  8380		}
  8381	
  8382		/*
  8383		 * page_mkwrite gets called when the page is firstly dirtied after it's
  8384		 * faulted in, but write(2) could also dirty a page and set delalloc
  8385		 * bits, thus in this case for space account reason, we still need to
  8386		 * clear any delalloc bits within this page range since we have to
  8387		 * reserve data&meta space before lock_page() (see above comments).
  8388		 */
  8389		clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
  8390				  EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
  8391				  EXTENT_DEFRAG, 0, 0, &cached_state);
  8392	
  8393		ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
  8394						&cached_state);
  8395		if (ret2) {
  8396			unlock_extent_cached(io_tree, page_start, page_end,
  8397					     &cached_state);
  8398			ret = VM_FAULT_SIGBUS;
  8399			goto out_unlock;
  8400		}
  8401	
  8402		/* page is wholly or partially inside EOF */
  8403		if (page_start + PAGE_SIZE > size)
  8404			zero_start = offset_in_page(size);
  8405		else
  8406			zero_start = PAGE_SIZE;
  8407	
  8408		if (zero_start != PAGE_SIZE) {
  8409			kaddr = kmap(page);
  8410			memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
  8411			flush_dcache_page(page);
  8412			kunmap(page);
  8413		}
  8414		ClearPageChecked(page);
  8415		set_page_dirty(page);
  8416		SetPageUptodate(page);
  8417	
  8418		BTRFS_I(inode)->last_trans = fs_info->generation;
  8419		BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
  8420		BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit;
  8421	
  8422		unlock_extent_cached(io_tree, page_start, page_end, &cached_state);
  8423	
  8424		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
  8425		sb_end_pagefault(inode->i_sb);
  8426		extent_changeset_free(data_reserved);
  8427		return VM_FAULT_LOCKED;
  8428	
  8429	out_unlock:
  8430		unlock_page(page);
  8431	out:
  8432		btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
  8433		btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
  8434					     reserved_space, (ret != 0));
  8435	out_noreserve:
  8436		sb_end_pagefault(inode->i_sb);
  8437		extent_changeset_free(data_reserved);
  8438		return ret;
  8439	}
  8440	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 5ae3fa0386b7..6d203acfdeb3 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -542,13 +542,19 @@  static noinline int add_ra_bio_pages(struct inode *inode,
 			goto next;
 		}
 
-		end = last_offset + PAGE_SIZE - 1;
 		/*
 		 * at this point, we have a locked page in the page cache
 		 * for these bytes in the file.  But, we have to make
 		 * sure they map to this compressed extent on disk.
 		 */
-		set_page_extent_mapped(page);
+		ret = set_page_extent_mapped(page);
+		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
+			break;
+		}
+
+		end = last_offset + PAGE_SIZE - 1;
 		lock_extent(tree, last_offset, end);
 		read_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, last_offset,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 2902484ab9f9..335a0aa3a6ec 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3187,10 +3187,39 @@  static int attach_extent_buffer_page(struct extent_buffer *eb,
 	return 0;
 }
 
-void set_page_extent_mapped(struct page *page)
+int __must_check set_page_extent_mapped(struct page *page)
 {
+	struct btrfs_fs_info *fs_info;
+
+	ASSERT(page->mapping);
+
+	if (PagePrivate(page))
+		return 0;
+
+	fs_info = btrfs_sb(page->mapping->host->i_sb);
+
+	if (fs_info->sectorsize < PAGE_SIZE)
+		return btrfs_attach_subpage(fs_info, page);
+
+	attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
+	return 0;
+
+}
+
+void clear_page_extent_mapped(struct page *page)
+{
+	struct btrfs_fs_info *fs_info;
+
+	ASSERT(page->mapping);
+
 	if (!PagePrivate(page))
-		attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE);
+		return;
+
+	fs_info = btrfs_sb(page->mapping->host->i_sb);
+	if (fs_info->sectorsize < PAGE_SIZE)
+		return btrfs_detach_subpage(fs_info, page);
+
+	detach_page_private(page);
 }
 
 static struct extent_map *
@@ -3247,7 +3276,12 @@  int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 	unsigned long this_bio_flag = 0;
 	struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
 
-	set_page_extent_mapped(page);
+	ret = set_page_extent_mapped(page);
+	if (ret < 0) {
+		unlock_extent(tree, start, end);
+		SetPageError(page);
+		goto out;
+	}
 
 	if (!PageUptodate(page)) {
 		if (cleancache_get_page(page) == 0) {
@@ -3688,7 +3722,11 @@  static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 		flush_dcache_page(page);
 	}
 
-	set_page_extent_mapped(page);
+	ret = set_page_extent_mapped(page);
+	if (ret < 0) {
+		SetPageError(page);
+		goto done;
+	}
 
 	if (!epd->extent_locked) {
 		ret = writepage_delalloc(BTRFS_I(inode), page, wbc, start,
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bedf761a0300..357a3380cd42 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -178,7 +178,8 @@  int btree_write_cache_pages(struct address_space *mapping,
 void extent_readahead(struct readahead_control *rac);
 int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
 		  u64 start, u64 len);
-void set_page_extent_mapped(struct page *page);
+int __must_check set_page_extent_mapped(struct page *page);
+void clear_page_extent_mapped(struct page *page);
 
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 					  u64 start, u64 owner_root, int level);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1602975ddb88..a6f627f92c64 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1369,6 +1369,12 @@  static noinline int prepare_pages(struct inode *inode, struct page **pages,
 			goto fail;
 		}
 
+		err = set_page_extent_mapped(pages[i]);
+		if (err < 0) {
+			faili = i;
+			goto fail;
+		}
+
 		if (i == 0)
 			err = prepare_uptodate_page(inode, pages[i], pos,
 						    force_uptodate);
@@ -1453,23 +1459,11 @@  lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
 	}
 
 	/*
-	 * It's possible the pages are dirty right now, but we don't want
-	 * to clean them yet because copy_from_user may catch a page fault
-	 * and we might have to fall back to one page at a time.  If that
-	 * happens, we'll unlock these pages and we'd have a window where
-	 * reclaim could sneak in and drop the once-dirty page on the floor
-	 * without writing it.
-	 *
-	 * We have the pages locked and the extent range locked, so there's
-	 * no way someone can start IO on any dirty pages in this range.
-	 *
-	 * We'll call btrfs_dirty_pages() later on, and that will flip around
-	 * delalloc bits and dirty the pages as required.
+	 * We should be called after prepare_pages() which should have
+	 * locked all pages in the range.
 	 */
-	for (i = 0; i < num_pages; i++) {
-		set_page_extent_mapped(pages[i]);
+	for (i = 0; i < num_pages; i++)
 		WARN_ON(!PageLocked(pages[i]));
-	}
 
 	return ret;
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index fd6ddd6b8165..379bef967e1d 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -431,11 +431,22 @@  static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
 	int i;
 
 	for (i = 0; i < io_ctl->num_pages; i++) {
+		int ret;
+
 		page = find_or_create_page(inode->i_mapping, i, mask);
 		if (!page) {
 			io_ctl_drop_pages(io_ctl);
 			return -ENOMEM;
 		}
+
+		ret = set_page_extent_mapped(page);
+		if (ret < 0) {
+			unlock_page(page);
+			put_page(page);
+			io_ctl_drop_pages(io_ctl);
+			return -ENOMEM;
+		}
+
 		io_ctl->pages[i] = page;
 		if (uptodate && !PageUptodate(page)) {
 			btrfs_readpage(NULL, page);
@@ -455,10 +466,8 @@  static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
 		}
 	}
 
-	for (i = 0; i < io_ctl->num_pages; i++) {
+	for (i = 0; i < io_ctl->num_pages; i++)
 		clear_page_dirty_for_io(io_ctl->pages[i]);
-		set_page_extent_mapped(io_ctl->pages[i]);
-	}
 
 	return 0;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ecc1f1f60b48..0cf3a0b7e98c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4712,6 +4712,9 @@  int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
 		ret = -ENOMEM;
 		goto out;
 	}
+	ret = set_page_extent_mapped(page);
+	if (ret < 0)
+		goto out_unlock;
 
 	if (!PageUptodate(page)) {
 		ret = btrfs_readpage(NULL, page);
@@ -4729,7 +4732,6 @@  int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
 	wait_on_page_writeback(page);
 
 	lock_extent_bits(io_tree, block_start, block_end, &cached_state);
-	set_page_extent_mapped(page);
 
 	ordered = btrfs_lookup_ordered_extent(inode, block_start);
 	if (ordered) {
@@ -8109,7 +8111,7 @@  static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
 {
 	int ret = try_release_extent_mapping(page, gfp_flags);
 	if (ret == 1)
-		detach_page_private(page);
+		clear_page_extent_mapped(page);
 	return ret;
 }
 
@@ -8268,7 +8270,7 @@  static void btrfs_invalidatepage(struct page *page, unsigned int offset,
 	}
 
 	ClearPageChecked(page);
-	detach_page_private(page);
+	clear_page_extent_mapped(page);
 }
 
 /*
@@ -8347,7 +8349,9 @@  vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
 	wait_on_page_writeback(page);
 
 	lock_extent_bits(io_tree, page_start, page_end, &cached_state);
-	set_page_extent_mapped(page);
+	ret = set_page_extent_mapped(page);
+	if (ret < 0)
+		goto out_unlock;
 
 	/*
 	 * we can't set the delalloc bits if there are pending ordered
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 5b9b0a390f0e..5a93530bca46 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1314,6 +1314,10 @@  static int cluster_pages_for_defrag(struct inode *inode,
 		if (!page)
 			break;
 
+		ret = set_page_extent_mapped(page);
+		if (ret < 0)
+			break;
+
 		page_start = page_offset(page);
 		page_end = page_start + PAGE_SIZE - 1;
 		while (1) {
@@ -1435,7 +1439,6 @@  static int cluster_pages_for_defrag(struct inode *inode,
 	for (i = 0; i < i_done; i++) {
 		clear_page_dirty_for_io(pages[i]);
 		ClearPageChecked(pages[i]);
-		set_page_extent_mapped(pages[i]);
 		set_page_dirty(pages[i]);
 		unlock_page(pages[i]);
 		put_page(pages[i]);
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index b03e7891394e..b24396cf2f99 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -81,7 +81,10 @@  static int copy_inline_to_page(struct btrfs_inode *inode,
 		goto out_unlock;
 	}
 
-	set_page_extent_mapped(page);
+	ret = set_page_extent_mapped(page);
+	if (ret < 0)
+		goto out_unlock;
+
 	clear_extent_bit(&inode->io_tree, file_offset, range_end,
 			 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
 			 0, 0, NULL);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8e51b39cbfbb..d917fdef0cbf 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2679,6 +2679,16 @@  static int relocate_file_extent_cluster(struct inode *inode,
 				goto out;
 			}
 		}
+		ret = set_page_extent_mapped(page);
+		if (ret < 0) {
+			btrfs_delalloc_release_metadata(BTRFS_I(inode),
+						PAGE_SIZE, true);
+			btrfs_delalloc_release_extents(BTRFS_I(inode),
+						PAGE_SIZE);
+			unlock_page(page);
+			put_page(page);
+			goto out;
+		}
 
 		if (PageReadahead(page)) {
 			page_cache_async_readahead(inode->i_mapping,
@@ -2706,8 +2716,6 @@  static int relocate_file_extent_cluster(struct inode *inode,
 
 		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
 
-		set_page_extent_mapped(page);
-
 		if (nr < cluster->nr &&
 		    page_start + offset == cluster->boundary[nr]) {
 			set_extent_bits(&BTRFS_I(inode)->io_tree,