diff mbox series

[3/3] btrfs: utilize the physically/virtually continuous extent buffer memory

Message ID 8bc15bfdaa2805d1d1b660b8b2e07a55aa02027d.1692858397.git.wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: make extent buffer memory continuous | expand

Commit Message

Qu Wenruo Aug. 24, 2023, 6:33 a.m. UTC
Since the extent buffer pages are either physically or virtually
continuous, let's benefit from the new feature.

This involves the following changes:

- Extent buffer accessors
  Now read/write/memcpy/memmove_extent_buffer() functions are just
  a wrapper of memcpy()/memmove().

  The cross-page handling are handled by hardware MMU.

- Extent buffer bitmap accessors

- csum_tree_block()
  We can directly go crypto_shash_digest(), as we don't need to handle
  page boundaries anymore.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c   |  18 +--
 fs/btrfs/extent_io.c | 282 +++++--------------------------------------
 fs/btrfs/extent_io.h |  10 ++
 3 files changed, 47 insertions(+), 263 deletions(-)

Comments

kernel test robot Sept. 6, 2023, 2:45 a.m. UTC | #1
Hello,

kernel test robot noticed a 12.0% improvement of filebench.sum_operations/s on:


commit: 2fa4ac9754a7fa77bad88aae11ac77ba137d3858 ("[PATCH 3/3] btrfs: utilize the physically/virtually continuous extent buffer memory")
url: https://github.com/intel-lab-lkp/linux/commits/Qu-Wenruo/btrfs-warn-on-tree-blocks-which-are-not-nodesize-aligned/20230824-143628
base: https://git.kernel.org/cgit/linux/kernel/git/kdave/linux.git for-next
patch link: https://lore.kernel.org/all/8bc15bfdaa2805d1d1b660b8b2e07a55aa02027d.1692858397.git.wqu@suse.com/
patch subject: [PATCH 3/3] btrfs: utilize the physically/virtually continuous extent buffer memory

testcase: filebench
test machine: 96 threads 2 sockets (Ice Lake) with 128G memory
parameters:

	disk: 1HDD
	fs: btrfs
	fs2: cifs
	test: webproxy.f
	cpufreq_governor: performance






Details are as below:
-------------------------------------------------------------------------------------------------->


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20230906/202309061050.19c12499-oliver.sang@intel.com

=========================================================================================
compiler/cpufreq_governor/disk/fs2/fs/kconfig/rootfs/tbox_group/test/testcase:
  gcc-12/performance/1HDD/cifs/btrfs/x86_64-rhel-8.3/debian-11.1-x86_64-20220510.cgz/lkp-icl-2sp1/webproxy.f/filebench

commit: 
  19e81514b8 ("btrfs: map uncontinuous extent buffer pages into virtual address space")
  2fa4ac9754 ("btrfs: utilize the physically/virtually continuous extent buffer memory")

19e81514b8c09202 2fa4ac9754a7fa77bad88aae11a 
---------------- --------------------------- 
         %stddev     %change         %stddev
             \          |                \  
     30592 ±194%     -92.3%       2343 ± 24%  sched_debug.cpu.avg_idle.min
      1.38            -5.9%       1.30        iostat.cpu.iowait
      4.63            +8.9%       5.04        iostat.cpu.system
      2.56            +0.5        3.09        mpstat.cpu.all.sys%
      0.54            +0.1        0.61        mpstat.cpu.all.usr%
      1996            +3.3%       2062        vmstat.io.bo
     33480           +13.5%      37993        vmstat.system.cs
    152.67           +12.6%     171.83        turbostat.Avg_MHz
      2562            +4.2%       2670        turbostat.Bzy_MHz
      5.34            +0.5        5.83        turbostat.C1E%
      7.12 ± 12%     -21.6%       5.58 ± 12%  turbostat.Pkg%pc2
    209.72            +1.5%     212.81        turbostat.PkgWatt
      4.92 ± 24%      +3.5        8.37 ± 32%  perf-profile.calltrace.cycles-pp.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary
      5.13 ± 28%      +3.6        8.68 ± 31%  perf-profile.calltrace.cycles-pp.cpuidle_idle_call.do_idle.cpu_startup_entry.start_secondary.secondary_startup_64_no_verify
      5.13 ± 28%      +3.8        8.90 ± 30%  perf-profile.calltrace.cycles-pp.cpuidle_enter_state.cpuidle_enter.cpuidle_idle_call.do_idle.cpu_startup_entry
      5.13 ± 28%      +3.8        8.90 ± 30%  perf-profile.children.cycles-pp.cpuidle_enter
      5.13 ± 28%      +3.8        8.90 ± 30%  perf-profile.children.cycles-pp.cpuidle_enter_state
      5.34 ± 34%      +3.9        9.21 ± 28%  perf-profile.children.cycles-pp.cpuidle_idle_call
     13.90            +9.6%      15.23        filebench.sum_bytes_mb/s
    238030           +12.0%     266575        filebench.sum_operations
      3966           +12.0%       4442        filebench.sum_operations/s
      1043           +12.0%       1168        filebench.sum_reads/s
     25.14           -10.7%      22.46        filebench.sum_time_ms/op
    208.83           +11.9%     233.67        filebench.sum_writes/s
    506705            +5.8%     536097        filebench.time.file_system_outputs
      1597 ±  5%     -36.1%       1020 ±  3%  filebench.time.involuntary_context_switches
     61810 ±  2%      +6.0%      65519        filebench.time.minor_page_faults
    157.67 ±  2%     +31.5%     207.33        filebench.time.percent_of_cpu_this_job_got
    117.60 ±  2%     +27.1%     149.48        filebench.time.system_time
    375177           +10.3%     413862        filebench.time.voluntary_context_switches
     18717            +6.5%      19942        proc-vmstat.nr_active_anon
     20206            +1.2%      20445        proc-vmstat.nr_active_file
    298911            +2.2%     305406        proc-vmstat.nr_anon_pages
    132893            +5.6%     140397        proc-vmstat.nr_dirtied
    313040            +2.0%     319443        proc-vmstat.nr_inactive_anon
     32910            +3.4%      34035        proc-vmstat.nr_shmem
     62503            +1.4%      63367        proc-vmstat.nr_slab_unreclaimable
     99471            +3.7%     103159        proc-vmstat.nr_written
     18717            +6.5%      19942        proc-vmstat.nr_zone_active_anon
     20206            +1.2%      20445        proc-vmstat.nr_zone_active_file
    313040            +2.0%     319443        proc-vmstat.nr_zone_inactive_anon
    943632            +3.2%     974142        proc-vmstat.numa_hit
    841654            +3.6%     871757        proc-vmstat.numa_local
    453634 ± 17%     +27.0%     576268 ±  5%  proc-vmstat.numa_pte_updates
     87464            +6.1%      92814        proc-vmstat.pgactivate
   1595438            +2.9%    1641074        proc-vmstat.pgalloc_normal
   1453326            +3.0%    1497530        proc-vmstat.pgfree
     17590 ±  5%     +14.0%      20045 ±  7%  proc-vmstat.pgreuse
    732160            -1.8%     719104        proc-vmstat.unevictable_pgs_scanned
     19.10            -8.1%      17.55        perf-stat.i.MPKI
 2.039e+09           +17.3%  2.393e+09        perf-stat.i.branch-instructions
      1.27 ±  2%      -0.1        1.15        perf-stat.i.branch-miss-rate%
  25600761            +5.8%   27075672        perf-stat.i.branch-misses
   5037721 ±  4%     +11.4%    5612619        perf-stat.i.cache-misses
 1.632e+08            +5.9%  1.729e+08        perf-stat.i.cache-references
     34079           +14.1%      38871        perf-stat.i.context-switches
 1.326e+10           +14.7%  1.521e+10        perf-stat.i.cpu-cycles
    551.02 ±  2%     +21.0%     666.59 ±  3%  perf-stat.i.cpu-migrations
   3953434 ±  2%     +10.8%    4381924 ±  3%  perf-stat.i.dTLB-load-misses
 2.343e+09           +15.4%  2.704e+09        perf-stat.i.dTLB-loads
 1.141e+09           +14.3%  1.303e+09        perf-stat.i.dTLB-stores
 9.047e+09           +14.9%  1.039e+10        perf-stat.i.instructions
      0.69            +2.0%       0.71        perf-stat.i.ipc
      0.14           +14.7%       0.16        perf-stat.i.metric.GHz
     34.94 ±  4%     +11.1%      38.80        perf-stat.i.metric.K/sec
     59.21           +15.6%      68.43        perf-stat.i.metric.M/sec
      3999 ±  3%      +6.3%       4250        perf-stat.i.minor-faults
   1116010 ±  4%     +14.8%    1280875 ±  2%  perf-stat.i.node-load-misses
   1168171 ±  3%      +7.9%    1259922 ±  2%  perf-stat.i.node-stores
      3999 ±  3%      +6.3%       4250        perf-stat.i.page-faults
     18.04            -7.8%      16.64        perf-stat.overall.MPKI
      1.26 ±  2%      -0.1        1.13        perf-stat.overall.branch-miss-rate%
 2.012e+09           +17.3%  2.359e+09        perf-stat.ps.branch-instructions
  25253051            +5.7%   26690222        perf-stat.ps.branch-misses
   4970910 ±  4%     +11.3%    5534021        perf-stat.ps.cache-misses
  1.61e+08            +5.9%  1.705e+08        perf-stat.ps.cache-references
     33628           +14.0%      38332        perf-stat.ps.context-switches
 1.308e+10           +14.6%    1.5e+10        perf-stat.ps.cpu-cycles
    543.73 ±  2%     +20.9%     657.37 ±  3%  perf-stat.ps.cpu-migrations
   3900887 ±  2%     +10.8%    4321011 ±  3%  perf-stat.ps.dTLB-load-misses
 2.312e+09           +15.3%  2.666e+09        perf-stat.ps.dTLB-loads
 1.125e+09           +14.2%  1.285e+09        perf-stat.ps.dTLB-stores
 8.925e+09           +14.8%  1.024e+10        perf-stat.ps.instructions
      3943 ±  3%      +6.2%       4187        perf-stat.ps.minor-faults
   1101275 ±  4%     +14.7%    1263151 ±  2%  perf-stat.ps.node-load-misses
   1152648 ±  3%      +7.7%    1241973 ±  2%  perf-stat.ps.node-stores
      3943 ±  3%      +6.2%       4187        perf-stat.ps.page-faults
 6.777e+11           +10.5%   7.49e+11        perf-stat.total.instructions
      0.01 ±  7%     -28.2%       0.00 ± 26%  perf-sched.sch_delay.avg.ms.schedule_preempt_disabled.rwsem_down_read_slowpath.down_read.__btrfs_tree_read_lock
      0.30 ± 35%     -63.0%       0.11 ± 25%  perf-sched.sch_delay.max.ms.__cond_resched.__kmem_cache_alloc_node.__kmalloc.cifs_strndup_to_utf16.cifs_convert_path_to_utf16
     30.21 ±  3%      -6.2%      28.33 ±  3%  perf-sched.total_wait_and_delay.average.ms
     30.15 ±  3%      -6.2%      28.28 ±  3%  perf-sched.total_wait_time.average.ms
      1.08           -20.5%       0.86 ±  2%  perf-sched.wait_and_delay.avg.ms.io_schedule.folio_wait_bit_common.filemap_update_page.filemap_get_pages
     99.86 ± 27%     +71.6%     171.38 ± 32%  perf-sched.wait_and_delay.avg.ms.kthreadd.ret_from_fork.ret_from_fork_asm
      1.10 ±  2%     -16.3%       0.92        perf-sched.wait_and_delay.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      1.41 ±  5%     -87.1%       0.18 ±223%  perf-sched.wait_and_delay.avg.ms.schedule_preempt_disabled.__mutex_lock.constprop.0.cifs_call_async
      0.21           -13.4%       0.18        perf-sched.wait_and_delay.avg.ms.schedule_timeout.wait_woken.sk_wait_data.tcp_recvmsg_locked
    195.95 ± 10%     -18.4%     159.83 ± 12%  perf-sched.wait_and_delay.avg.ms.wait_for_response.compound_send_recv.cifs_send_recv.__SMB2_close
      2.60           -23.5%       1.99        perf-sched.wait_and_delay.avg.ms.wait_for_response.compound_send_recv.cifs_send_recv.query_info
     20.46           -13.7%      17.66 ±  4%  perf-sched.wait_and_delay.avg.ms.wait_for_response.compound_send_recv.smb2_compound_op.smb2_query_path_info
      3.35 ± 66%    +342.5%      14.82 ± 20%  perf-sched.wait_and_delay.avg.ms.wait_for_response.compound_send_recv.smb2_compound_op.smb2_unlink
      2103           +10.0%       2312 ±  3%  perf-sched.wait_and_delay.count.__lock_sock.sk_wait_data.tcp_recvmsg_locked.tcp_recvmsg
      1025           +14.8%       1176        perf-sched.wait_and_delay.count.io_schedule.folio_wait_bit_common.folio_wait_writeback.__filemap_fdatawait_range
      9729 ±  2%     +21.1%      11779        perf-sched.wait_and_delay.count.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      2349 ±  9%     +29.3%       3038 ± 10%  perf-sched.wait_and_delay.count.schedule_preempt_disabled.__mutex_lock.constprop.0.compound_send_recv
    998.00           +14.3%       1140        perf-sched.wait_and_delay.count.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat
      1026           +15.0%       1181        perf-sched.wait_and_delay.count.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.open_last_lookups
     18409           +12.5%      20714 ±  4%  perf-sched.wait_and_delay.count.schedule_timeout.wait_woken.sk_wait_data.tcp_recvmsg_locked
      1011           +14.8%       1160        perf-sched.wait_and_delay.count.wait_for_response.compound_send_recv.cifs_send_recv.query_info
      1013           +14.5%       1160        perf-sched.wait_and_delay.count.wait_for_response.compound_send_recv.smb2_compound_op.smb2_unlink
      2.68 ±  4%     -19.6%       2.16 ±  7%  perf-sched.wait_and_delay.max.ms.__lock_sock.sk_wait_data.tcp_recvmsg_locked.tcp_recvmsg
    282.00 ±  3%     -11.3%     250.07 ±  4%  perf-sched.wait_and_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat
    280.97 ±  2%     -12.8%     244.97 ±  2%  perf-sched.wait_and_delay.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.open_last_lookups
      0.49 ±125%     -97.2%       0.01 ±198%  perf-sched.wait_time.avg.ms.exit_to_user_mode_loop.exit_to_user_mode_prepare.irqentry_exit_to_user_mode.asm_sysvec_call_function_single
      1.05           -20.9%       0.83 ±  2%  perf-sched.wait_time.avg.ms.io_schedule.folio_wait_bit_common.filemap_update_page.filemap_get_pages
      2.14 ±  4%     +19.1%       2.55 ±  8%  perf-sched.wait_time.avg.ms.io_schedule.rq_qos_wait.wbt_wait.__rq_qos_throttle
     99.82 ± 27%     +69.8%     169.46 ± 31%  perf-sched.wait_time.avg.ms.kthreadd.ret_from_fork.ret_from_fork_asm
      1.08 ±  2%     -16.6%       0.90        perf-sched.wait_time.avg.ms.schedule_hrtimeout_range_clock.ep_poll.do_epoll_wait.__x64_sys_epoll_wait
      1.37 ±  5%     -24.5%       1.03 ±  5%  perf-sched.wait_time.avg.ms.schedule_preempt_disabled.__mutex_lock.constprop.0.cifs_call_async
      0.20           -14.2%       0.17        perf-sched.wait_time.avg.ms.schedule_timeout.wait_woken.sk_wait_data.tcp_recvmsg_locked
    195.53 ± 10%     -18.4%     159.54 ± 12%  perf-sched.wait_time.avg.ms.wait_for_response.compound_send_recv.cifs_send_recv.__SMB2_close
      2.54           -24.0%       1.93        perf-sched.wait_time.avg.ms.wait_for_response.compound_send_recv.cifs_send_recv.query_info
     20.44           -13.8%      17.63 ±  4%  perf-sched.wait_time.avg.ms.wait_for_response.compound_send_recv.smb2_compound_op.smb2_query_path_info
      3.32 ± 67%    +345.6%      14.78 ± 20%  perf-sched.wait_time.avg.ms.wait_for_response.compound_send_recv.smb2_compound_op.smb2_unlink
    245.89 ±  9%     -11.8%     216.92 ±  6%  perf-sched.wait_time.max.ms.__cond_resched.__kmem_cache_alloc_node.__kmalloc.cifs_strndup_to_utf16.cifs_convert_path_to_utf16
      3.14 ±  9%     -43.6%       1.77 ± 40%  perf-sched.wait_time.max.ms.__cond_resched.dput.terminate_walk.path_openat.do_filp_open
      2.65 ±  3%     -19.9%       2.12 ±  6%  perf-sched.wait_time.max.ms.__lock_sock.sk_wait_data.tcp_recvmsg_locked.tcp_recvmsg
      0.57 ±101%     -91.5%       0.05 ±213%  perf-sched.wait_time.max.ms.exit_to_user_mode_loop.exit_to_user_mode_prepare.irqentry_exit_to_user_mode.asm_sysvec_call_function_single
      1.79 ± 82%     -86.4%       0.24 ± 58%  perf-sched.wait_time.max.ms.exit_to_user_mode_loop.exit_to_user_mode_prepare.irqentry_exit_to_user_mode.asm_sysvec_reschedule_ipi
    281.92 ±  3%     -11.3%     249.99 ±  4%  perf-sched.wait_time.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.do_unlinkat
    280.90 ±  2%     -12.8%     244.88 ±  2%  perf-sched.wait_time.max.ms.schedule_preempt_disabled.rwsem_down_write_slowpath.down_write.open_last_lookups




Disclaimer:
Results have been estimated based on internal Intel analysis and are provided
for informational purposes only. Any difference in system hardware or software
design or configuration may affect actual performance.
diff mbox series

Patch

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0a96ea8c1d3a..03a423f687b8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -75,24 +75,14 @@  static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
 static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
 	struct btrfs_fs_info *fs_info = buf->fs_info;
-	const int num_pages = num_extent_pages(buf);
-	const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
-	char *kaddr;
-	int i;
+	void *eb_addr = btrfs_get_eb_addr(buf);
 
+	memset(result, 0, BTRFS_CSUM_SIZE);
 	shash->tfm = fs_info->csum_shash;
 	crypto_shash_init(shash);
-	kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
-	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
-			    first_page_part - BTRFS_CSUM_SIZE);
-
-	for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
-		kaddr = page_address(buf->pages[i]);
-		crypto_shash_update(shash, kaddr, PAGE_SIZE);
-	}
-	memset(result, 0, BTRFS_CSUM_SIZE);
-	crypto_shash_final(shash, result);
+	crypto_shash_digest(shash, eb_addr + BTRFS_CSUM_SIZE,
+			    buf->len - BTRFS_CSUM_SIZE, result);
 }
 
 /*
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9f9a3ab82f04..70e22b9ccd28 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4073,100 +4073,39 @@  static inline int check_eb_range(const struct extent_buffer *eb,
 void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 			unsigned long start, unsigned long len)
 {
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	char *dst = (char *)dstv;
-	unsigned long i = get_eb_page_index(start);
+	void *eb_addr = btrfs_get_eb_addr(eb);
 
 	if (check_eb_range(eb, start, len))
 		return;
 
-	offset = get_eb_offset_in_page(eb, start);
-
-	while (len > 0) {
-		page = eb->pages[i];
-
-		cur = min(len, (PAGE_SIZE - offset));
-		kaddr = page_address(page);
-		memcpy(dst, kaddr + offset, cur);
-
-		dst += cur;
-		len -= cur;
-		offset = 0;
-		i++;
-	}
+	memcpy(dstv, eb_addr + start, len);
 }
 
 int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
 				       void __user *dstv,
 				       unsigned long start, unsigned long len)
 {
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	char __user *dst = (char __user *)dstv;
-	unsigned long i = get_eb_page_index(start);
-	int ret = 0;
+	void *eb_addr = btrfs_get_eb_addr(eb);
+	int ret;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = get_eb_offset_in_page(eb, start);
-
-	while (len > 0) {
-		page = eb->pages[i];
-
-		cur = min(len, (PAGE_SIZE - offset));
-		kaddr = page_address(page);
-		if (copy_to_user_nofault(dst, kaddr + offset, cur)) {
-			ret = -EFAULT;
-			break;
-		}
-
-		dst += cur;
-		len -= cur;
-		offset = 0;
-		i++;
-	}
-
-	return ret;
+	ret = copy_to_user_nofault(dstv, eb_addr + start, len);
+	if (ret)
+		return -EFAULT;
+	return 0;
 }
 
 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 			 unsigned long start, unsigned long len)
 {
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	char *ptr = (char *)ptrv;
-	unsigned long i = get_eb_page_index(start);
-	int ret = 0;
+	void *eb_addr = btrfs_get_eb_addr(eb);
 
 	if (check_eb_range(eb, start, len))
 		return -EINVAL;
 
-	offset = get_eb_offset_in_page(eb, start);
-
-	while (len > 0) {
-		page = eb->pages[i];
-
-		cur = min(len, (PAGE_SIZE - offset));
-
-		kaddr = page_address(page);
-		ret = memcmp(ptr, kaddr + offset, cur);
-		if (ret)
-			break;
-
-		ptr += cur;
-		len -= cur;
-		offset = 0;
-		i++;
-	}
-	return ret;
+	return memcmp(ptrv, eb_addr + start, len);
 }
 
 /*
@@ -4200,67 +4139,20 @@  static void assert_eb_page_uptodate(const struct extent_buffer *eb,
 	}
 }
 
-static void __write_extent_buffer(const struct extent_buffer *eb,
-				  const void *srcv, unsigned long start,
-				  unsigned long len, bool use_memmove)
-{
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	char *src = (char *)srcv;
-	unsigned long i = get_eb_page_index(start);
-	/* For unmapped (dummy) ebs, no need to check their uptodate status. */
-	const bool check_uptodate = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
-
-	WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
-
-	if (check_eb_range(eb, start, len))
-		return;
-
-	offset = get_eb_offset_in_page(eb, start);
-
-	while (len > 0) {
-		page = eb->pages[i];
-		if (check_uptodate)
-			assert_eb_page_uptodate(eb, page);
-
-		cur = min(len, PAGE_SIZE - offset);
-		kaddr = page_address(page);
-		if (use_memmove)
-			memmove(kaddr + offset, src, cur);
-		else
-			memcpy(kaddr + offset, src, cur);
-
-		src += cur;
-		len -= cur;
-		offset = 0;
-		i++;
-	}
-}
-
 void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 			 unsigned long start, unsigned long len)
 {
-	return __write_extent_buffer(eb, srcv, start, len, false);
+	void *eb_addr = btrfs_get_eb_addr(eb);
+
+	memcpy(eb_addr + start, srcv, len);
 }
 
 static void memset_extent_buffer(const struct extent_buffer *eb, int c,
 				 unsigned long start, unsigned long len)
 {
-	unsigned long cur = start;
+	void *eb_addr = btrfs_get_eb_addr(eb);
 
-	while (cur < start + len) {
-		unsigned long index = get_eb_page_index(cur);
-		unsigned int offset = get_eb_offset_in_page(eb, cur);
-		unsigned int cur_len = min(start + len - cur, PAGE_SIZE - offset);
-		struct page *page = eb->pages[index];
-
-		assert_eb_page_uptodate(eb, page);
-		memset(page_address(page) + offset, c, cur_len);
-
-		cur += cur_len;
-	}
+	memset(eb_addr + start, c, len);
 }
 
 void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
@@ -4274,20 +4166,12 @@  void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
 void copy_extent_buffer_full(const struct extent_buffer *dst,
 			     const struct extent_buffer *src)
 {
-	unsigned long cur = 0;
+	void *dst_addr = btrfs_get_eb_addr(dst);
+	void *src_addr = btrfs_get_eb_addr(src);
 
 	ASSERT(dst->len == src->len);
 
-	while (cur < src->len) {
-		unsigned long index = get_eb_page_index(cur);
-		unsigned long offset = get_eb_offset_in_page(src, cur);
-		unsigned long cur_len = min(src->len, PAGE_SIZE - offset);
-		void *addr = page_address(src->pages[index]) + offset;
-
-		write_extent_buffer(dst, addr, cur, cur_len);
-
-		cur += cur_len;
-	}
+	memcpy(dst_addr, src_addr, dst->len);
 }
 
 void copy_extent_buffer(const struct extent_buffer *dst,
@@ -4296,11 +4180,8 @@  void copy_extent_buffer(const struct extent_buffer *dst,
 			unsigned long len)
 {
 	u64 dst_len = dst->len;
-	size_t cur;
-	size_t offset;
-	struct page *page;
-	char *kaddr;
-	unsigned long i = get_eb_page_index(dst_offset);
+	void *dst_addr = btrfs_get_eb_addr(dst);
+	void *src_addr = btrfs_get_eb_addr(src);
 
 	if (check_eb_range(dst, dst_offset, len) ||
 	    check_eb_range(src, src_offset, len))
@@ -4308,54 +4189,7 @@  void copy_extent_buffer(const struct extent_buffer *dst,
 
 	WARN_ON(src->len != dst_len);
 
-	offset = get_eb_offset_in_page(dst, dst_offset);
-
-	while (len > 0) {
-		page = dst->pages[i];
-		assert_eb_page_uptodate(dst, page);
-
-		cur = min(len, (unsigned long)(PAGE_SIZE - offset));
-
-		kaddr = page_address(page);
-		read_extent_buffer(src, kaddr + offset, src_offset, cur);
-
-		src_offset += cur;
-		len -= cur;
-		offset = 0;
-		i++;
-	}
-}
-
-/*
- * eb_bitmap_offset() - calculate the page and offset of the byte containing the
- * given bit number
- * @eb: the extent buffer
- * @start: offset of the bitmap item in the extent buffer
- * @nr: bit number
- * @page_index: return index of the page in the extent buffer that contains the
- * given bit number
- * @page_offset: return offset into the page given by page_index
- *
- * This helper hides the ugliness of finding the byte in an extent buffer which
- * contains a given bit.
- */
-static inline void eb_bitmap_offset(const struct extent_buffer *eb,
-				    unsigned long start, unsigned long nr,
-				    unsigned long *page_index,
-				    size_t *page_offset)
-{
-	size_t byte_offset = BIT_BYTE(nr);
-	size_t offset;
-
-	/*
-	 * The byte we want is the offset of the extent buffer + the offset of
-	 * the bitmap item in the extent buffer + the offset of the byte in the
-	 * bitmap item.
-	 */
-	offset = start + offset_in_page(eb->start) + byte_offset;
-
-	*page_index = offset >> PAGE_SHIFT;
-	*page_offset = offset_in_page(offset);
+	memcpy(dst_addr + dst_offset, src_addr + src_offset, len);
 }
 
 /*
@@ -4368,25 +4202,18 @@  static inline void eb_bitmap_offset(const struct extent_buffer *eb,
 int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 			   unsigned long nr)
 {
-	u8 *kaddr;
-	struct page *page;
-	unsigned long i;
-	size_t offset;
+	const u8 *kaddr = btrfs_get_eb_addr(eb);
+	const unsigned long first_byte = start + BIT_BYTE(nr);
 
-	eb_bitmap_offset(eb, start, nr, &i, &offset);
-	page = eb->pages[i];
-	assert_eb_page_uptodate(eb, page);
-	kaddr = page_address(page);
-	return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
+	assert_eb_page_uptodate(eb, eb->pages[first_byte >> PAGE_SHIFT]);
+	return 1U & (kaddr[first_byte] >> (nr & (BITS_PER_BYTE - 1)));
 }
 
 static u8 *extent_buffer_get_byte(const struct extent_buffer *eb, unsigned long bytenr)
 {
-	unsigned long index = get_eb_page_index(bytenr);
-
 	if (check_eb_range(eb, bytenr, 1))
 		return NULL;
-	return page_address(eb->pages[index]) + get_eb_offset_in_page(eb, bytenr);
+	return btrfs_get_eb_addr(eb) + bytenr;
 }
 
 /*
@@ -4471,72 +4298,29 @@  void memcpy_extent_buffer(const struct extent_buffer *dst,
 			  unsigned long dst_offset, unsigned long src_offset,
 			  unsigned long len)
 {
-	unsigned long cur_off = 0;
+	void *eb_addr = btrfs_get_eb_addr(dst);
 
 	if (check_eb_range(dst, dst_offset, len) ||
 	    check_eb_range(dst, src_offset, len))
 		return;
 
-	while (cur_off < len) {
-		unsigned long cur_src = cur_off + src_offset;
-		unsigned long pg_index = get_eb_page_index(cur_src);
-		unsigned long pg_off = get_eb_offset_in_page(dst, cur_src);
-		unsigned long cur_len = min(src_offset + len - cur_src,
-					    PAGE_SIZE - pg_off);
-		void *src_addr = page_address(dst->pages[pg_index]) + pg_off;
-		const bool use_memmove = areas_overlap(src_offset + cur_off,
-						       dst_offset + cur_off, cur_len);
-
-		__write_extent_buffer(dst, src_addr, dst_offset + cur_off, cur_len,
-				      use_memmove);
-		cur_off += cur_len;
-	}
+	if (areas_overlap(dst_offset, src_offset, len))
+		memmove(eb_addr + dst_offset, eb_addr + src_offset, len);
+	else
+		memcpy(eb_addr + dst_offset, eb_addr + src_offset, len);
 }
 
 void memmove_extent_buffer(const struct extent_buffer *dst,
 			   unsigned long dst_offset, unsigned long src_offset,
 			   unsigned long len)
 {
-	unsigned long dst_end = dst_offset + len - 1;
-	unsigned long src_end = src_offset + len - 1;
+	void *eb_addr = btrfs_get_eb_addr(dst);
 
 	if (check_eb_range(dst, dst_offset, len) ||
 	    check_eb_range(dst, src_offset, len))
 		return;
 
-	if (dst_offset < src_offset) {
-		memcpy_extent_buffer(dst, dst_offset, src_offset, len);
-		return;
-	}
-
-	while (len > 0) {
-		unsigned long src_i;
-		size_t cur;
-		size_t dst_off_in_page;
-		size_t src_off_in_page;
-		void *src_addr;
-		bool use_memmove;
-
-		src_i = get_eb_page_index(src_end);
-
-		dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
-		src_off_in_page = get_eb_offset_in_page(dst, src_end);
-
-		cur = min_t(unsigned long, len, src_off_in_page + 1);
-		cur = min(cur, dst_off_in_page + 1);
-
-		src_addr = page_address(dst->pages[src_i]) + src_off_in_page -
-					cur + 1;
-		use_memmove = areas_overlap(src_end - cur + 1, dst_end - cur + 1,
-					    cur);
-
-		__write_extent_buffer(dst, src_addr, dst_end - cur + 1, cur,
-				      use_memmove);
-
-		dst_end -= cur;
-		src_end -= cur;
-		len -= cur;
-	}
+	memmove(eb_addr + dst_offset, eb_addr + src_offset, len);
 }
 
 #define GANG_LOOKUP_SIZE	16
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 930a2dc38157..bfa14457f461 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -140,6 +140,16 @@  static inline unsigned long get_eb_page_index(unsigned long offset)
 	return offset >> PAGE_SHIFT;
 }
 
+static inline void *btrfs_get_eb_addr(const struct extent_buffer *eb)
+{
+	/* For fallback vmapped extent buffer. */
+	if (eb->vaddr)
+		return eb->vaddr;
+
+	/* For physically continuous pages and subpage cases. */
+	return page_address(eb->pages[0]) + offset_in_page(eb->start);
+}
+
 /*
  * Structure to record how many bytes and which ranges are set/cleared
  */