From patchwork Tue Sep  8 07:52:14 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762749
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 7A341159A
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:51 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 6CF3C20C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:51 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729466AbgIHHwr (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:52:47 -0400
Received: from mx2.suse.de ([195.135.220.15]:50912 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729437AbgIHHwp (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:45 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id A8D4FAE25
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:52:44 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 01/17] btrfs: extent-io-tests: remove invalid tests
Date: Tue,  8 Sep 2020 15:52:14 +0800
Message-Id: <20200908075230.86856-2-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

In extent-io-test, there are two invalid tests:
- Invalid nodesize for test_eb_bitmaps()
  Instead of the sectorsize and nodesize combination passed in, we're
  always using hand-crafted nodesize.
  Although it has some extra check for 64K page size, we can still hit
  a case where PAGE_SIZE == 32K, then we got 128K nodesize which is
  larger than max valid node size.

  Thankfully most machines are either 4K or 64K page size, thus we
  haven't yet hit such case.

- Invalid extent buffer bytenr
  For 64K page size, the only combination we're going to test is
  sectorsize = nodesize = 64K.
  In that case, we'll try to create an extent buffer with 32K bytenr,
  which is not aligned to sectorsize thus invalid.

This patch will fix both problems by:
- Honor the sectorsize/nodesize combination
  Now we won't bother to hand-craft a strange length and use it as
  nodesize.

- Use sectorsize as the 2nd run extent buffer start
  This would test the case where extent buffer is aligned to sectorsize
  but not always aligned to nodesize.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/tests/extent-io-tests.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index df7ce874a74b..73e96d505f4f 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -379,54 +379,50 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
 static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
 {
 	struct btrfs_fs_info *fs_info;
-	unsigned long len;
 	unsigned long *bitmap = NULL;
 	struct extent_buffer *eb = NULL;
 	int ret;
 
 	test_msg("running extent buffer bitmap tests");
 
-	/*
-	 * In ppc64, sectorsize can be 64K, thus 4 * 64K will be larger than
-	 * BTRFS_MAX_METADATA_BLOCKSIZE.
-	 */
-	len = (sectorsize < BTRFS_MAX_METADATA_BLOCKSIZE)
-		? sectorsize * 4 : sectorsize;
-
-	fs_info = btrfs_alloc_dummy_fs_info(len, len);
+	fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
 	if (!fs_info) {
 		test_std_err(TEST_ALLOC_FS_INFO);
 		return -ENOMEM;
 	}
 
-	bitmap = kmalloc(len, GFP_KERNEL);
+	bitmap = kmalloc(nodesize, GFP_KERNEL);
 	if (!bitmap) {
 		test_err("couldn't allocate test bitmap");
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	eb = __alloc_dummy_extent_buffer(fs_info, 0, len);
+	eb = __alloc_dummy_extent_buffer(fs_info, 0, nodesize);
 	if (!eb) {
 		test_std_err(TEST_ALLOC_ROOT);
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	ret = __test_eb_bitmaps(bitmap, eb, len);
+	ret = __test_eb_bitmaps(bitmap, eb, nodesize);
 	if (ret)
 		goto out;
 
-	/* Do it over again with an extent buffer which isn't page-aligned. */
 	free_extent_buffer(eb);
-	eb = __alloc_dummy_extent_buffer(fs_info, nodesize / 2, len);
+
+	/*
+	 * Test again for case where the tree block is sectorsize aligned but
+	 * not nodesize aligned.
+	 */
+	eb = __alloc_dummy_extent_buffer(fs_info, sectorsize, nodesize);
 	if (!eb) {
 		test_std_err(TEST_ALLOC_ROOT);
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	ret = __test_eb_bitmaps(bitmap, eb, len);
+	ret = __test_eb_bitmaps(bitmap, eb, nodesize);
 out:
 	free_extent_buffer(eb);
 	kfree(bitmap);

From patchwork Tue Sep  8 07:52:15 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762753
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id F370259D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:51 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id E63CC21D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:51 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729490AbgIHHwt (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:52:49 -0400
Received: from mx2.suse.de ([195.135.220.15]:50930 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729414AbgIHHws (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:48 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 635C3AE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:52:46 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 02/17] btrfs: calculate inline extent buffer page size based
 on page size
Date: Tue,  8 Sep 2020 15:52:15 +0800
Message-Id: <20200908075230.86856-3-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

Btrfs only support 64K as max node size, thus for 4K page system, we
would have at most 16 pages for one extent buffer.

But for 64K system, we only need and always need one page for extent
buffer.
This stays true even for future subpage sized sector size support (as
long as extent buffer doesn't cross 64K boundary).

So this patch will change how INLINE_EXTENT_BUFFER_PAGES is calculated.

Instead of using fixed 16 pages, use (64K / PAGE_SIZE) as the result.
This should save some bytes for extent buffer structure for 64K
systems.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.h | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 00a88f2eb5ab..e16c5449ba48 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -86,8 +86,8 @@ struct extent_io_ops {
 };
 
 
-#define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
+#define MAX_INLINE_EXTENT_BUFFER_SIZE 	SZ_64K
+#define INLINE_EXTENT_BUFFER_PAGES 	(MAX_INLINE_EXTENT_BUFFER_SIZE / PAGE_SIZE)
 struct extent_buffer {
 	u64 start;
 	unsigned long len;
@@ -227,8 +227,15 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
 
 static inline int num_extent_pages(const struct extent_buffer *eb)
 {
-	return (round_up(eb->start + eb->len, PAGE_SIZE) >> PAGE_SHIFT) -
-	       (eb->start >> PAGE_SHIFT);
+	/*
+	 * For sectorsize == PAGE_SIZE case, since eb is always aligned to
+	 * sectorsize, it's just (eb->len / PAGE_SIZE) >> PAGE_SHIFT.
+	 *
+	 * For sectorsize < PAGE_SIZE case, we only want to support 64K
+	 * PAGE_SIZE, and ensured all tree blocks won't cross page boundary.
+	 * So in that case we always got 1 page.
+	 */
+	return (round_up(eb->len, PAGE_SIZE) >> PAGE_SHIFT);
 }
 
 static inline int extent_buffer_uptodate(const struct extent_buffer *eb)

From patchwork Tue Sep  8 07:52:16 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762755
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id DDA95746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:52 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id D0B3B21D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:52 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729543AbgIHHww (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:52:52 -0400
Received: from mx2.suse.de ([195.135.220.15]:50938 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729437AbgIHHws (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:48 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 60014AE25
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:52:48 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 03/17] btrfs: remove the open-code to read disk-key
Date: Tue,  8 Sep 2020 15:52:16 +0800
Message-Id: <20200908075230.86856-4-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

There is some ancient code from the old days where we handle the
disk_key read manually when the disk key is in one page.

But that's totally unnecessary, as we have read_extent_buffer() to
handle everything.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ctree.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index cd1cd673bc0b..e204e1320745 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1697,7 +1697,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 	}
 
 	while (low < high) {
-		unsigned long oip;
 		unsigned long offset;
 		struct btrfs_disk_key *tmp;
 		struct btrfs_disk_key unaligned;
@@ -1705,17 +1704,9 @@ static noinline int generic_bin_search(struct extent_buffer *eb,
 
 		mid = (low + high) / 2;
 		offset = p + mid * item_size;
-		oip = offset_in_page(offset);
 
-		if (oip + key_size <= PAGE_SIZE) {
-			const unsigned long idx = offset >> PAGE_SHIFT;
-			char *kaddr = page_address(eb->pages[idx]);
-
-			tmp = (struct btrfs_disk_key *)(kaddr + oip);
-		} else {
-			read_extent_buffer(eb, &unaligned, offset, key_size);
-			tmp = &unaligned;
-		}
+		read_extent_buffer(eb, &unaligned, offset, key_size);
+		tmp = &unaligned;
 
 		ret = comp_keys(tmp, key);
 

From patchwork Tue Sep  8 07:52:17 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762757
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CFB55746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:55 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id B930420C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:52:55 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729554AbgIHHwx (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:52:53 -0400
Received: from mx2.suse.de ([195.135.220.15]:50944 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729533AbgIHHwu (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:50 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 25940AE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:52:50 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 04/17] btrfs: make btrfs_fs_info::buffer_radix to take sector
 size devided values
Date: Tue,  8 Sep 2020 15:52:17 +0800
Message-Id: <20200908075230.86856-5-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

For subpage size sector size support, one page can contain mutliple tree
blocks, thus we can no longer use (eb->start >> PAGE_SHIFT) any more, or
we can easily get extent buffer doesn't belongs to us.

This patch will use (extent_buffer::start / sectorsize) as index for radix
tree so that we can get correct extent buffer for subpage size support.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/extent_io.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6def411b2eba..5d969340275e 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5142,7 +5142,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
 
 	rcu_read_lock();
 	eb = radix_tree_lookup(&fs_info->buffer_radix,
-			       start >> PAGE_SHIFT);
+			       start / fs_info->sectorsize);
 	if (eb && atomic_inc_not_zero(&eb->refs)) {
 		rcu_read_unlock();
 		/*
@@ -5194,7 +5194,7 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 	}
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
+				start / fs_info->sectorsize, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -5302,7 +5302,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 	spin_lock(&fs_info->buffer_lock);
 	ret = radix_tree_insert(&fs_info->buffer_radix,
-				start >> PAGE_SHIFT, eb);
+				start / fs_info->sectorsize, eb);
 	spin_unlock(&fs_info->buffer_lock);
 	radix_tree_preload_end();
 	if (ret == -EEXIST) {
@@ -5358,7 +5358,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
 
 			spin_lock(&fs_info->buffer_lock);
 			radix_tree_delete(&fs_info->buffer_radix,
-					  eb->start >> PAGE_SHIFT);
+					  eb->start / fs_info->sectorsize);
 			spin_unlock(&fs_info->buffer_lock);
 		} else {
 			spin_unlock(&eb->refs_lock);

From patchwork Tue Sep  8 07:52:18 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762759
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 15CFE59D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:00 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 08FD821D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:00 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729566AbgIHHw7 (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:52:59 -0400
Received: from mx2.suse.de ([195.135.220.15]:50974 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729552AbgIHHwx (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:53 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 39834AE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:52:53 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 05/17] btrfs: don't allow tree block to cross page boundary
 for subpage support
Date: Tue,  8 Sep 2020 15:52:18 +0800
Message-Id: <20200908075230.86856-6-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

As a preparation for subpage sector size support (allow sector size
smaller than page size to be mounted), if the sector size is smaller
than page size, we don't allow tree block to be read if it cross page
boundary (normally 64K).

This ensures that, tree blocks are always contained in one page for 64K
system, which can greatly simplify the handling.

Or we need to do complex multi-page handling for tree blocks.

Currently the only way to create such tree blocks crossing 64K boundary
is by btrfs-convert, which will get fixed soon and doesn't get
wide-spread usage.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5d969340275e..119193166cec 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5232,6 +5232,13 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 		btrfs_err(fs_info, "bad tree block start %llu", start);
 		return ERR_PTR(-EINVAL);
 	}
+	if (fs_info->sectorsize < PAGE_SIZE && round_down(start, PAGE_SIZE) !=
+	    round_down(start + len - 1, PAGE_SIZE)) {
+		btrfs_err(fs_info,
+		"tree block crosses page boundary, start %llu nodesize %lu",
+			  start, len);
+		return ERR_PTR(-EINVAL);
+	}
 
 	eb = find_extent_buffer(fs_info, start);
 	if (eb)

From patchwork Tue Sep  8 07:52:19 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762763
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 8DB5F59D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:03 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 70A1621D46
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:03 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729580AbgIHHxC (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:02 -0400
Received: from mx2.suse.de ([195.135.220.15]:51050 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729560AbgIHHw7 (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:52:59 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 48074AE25;
        Tue,  8 Sep 2020 07:52:58 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Subject: [PATCH 06/17] btrfs: handle sectorsize < PAGE_SIZE case for extent
 buffer accessors
Date: Tue,  8 Sep 2020 15:52:19 +0800
Message-Id: <20200908075230.86856-7-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

To support sectorsize < PAGE_SIZE case, we need to take extra care for
extent buffer accessors.

Since sectorsize is smaller than PAGE_SIZE, one page can contain
multiple tree blocks, we must use eb->start to determine the real offset
to read/write for extent buffer accessors.

This patch introduces two helpers to do these:
- get_eb_page_index()
  This is to calculate the index to access extent_buffer::pages.
  It's just a simple wrapper around "start >> PAGE_SHIFT".

  For sectorsize == PAGE_SIZE case, nothing is changed.
  For sectorsize < PAGE_SIZE case, we always get index as 0, and
  the existing page shift works also fine.

- get_eb_page_offset()
  This is to calculate the offset to access extent_buffer::pages.
  This needs to take extent_buffer::start into consideration.

  For sectorsize == PAGE_SIZE case, extent_buffer::start is always
  aligned to PAGE_SIZE, thus adding extent_buffer::start to
  offset_in_page() won't change the result.
  For sectorsize < PAGE_SIZE case, adding extent_buffer::start gives
  us the correct offset to access.

This patch will touch the following parts to cover all extent buffer
accessors:

- BTRFS_SETGET_HEADER_FUNCS()
- read_extent_buffer()
- read_extent_buffer_to_user()
- memcmp_extent_buffer()
- write_extent_buffer_chunk_tree_uuid()
- write_extent_buffer_fsid()
- write_extent_buffer()
- memzero_extent_buffer()
- copy_extent_buffer_full()
- copy_extent_buffer()
- memcpy_extent_buffer()
- memmove_extent_buffer()
- btrfs_get_token_##bits()
- btrfs_get_##bits()
- btrfs_set_token_##bits()
- btrfs_set_##bits()

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/ctree.h        | 38 ++++++++++++++++++++++--
 fs/btrfs/extent_io.c    | 66 ++++++++++++++++++++++++-----------------
 fs/btrfs/struct-funcs.c | 18 ++++++-----
 3 files changed, 85 insertions(+), 37 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 9a72896bed2e..81d5a6cc97b5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1448,14 +1448,15 @@ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
 #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits)		\
 static inline u##bits btrfs_##name(const struct extent_buffer *eb)	\
 {									\
-	const type *p = page_address(eb->pages[0]);			\
+	const type *p = page_address(eb->pages[0]) +			\
+			offset_in_page(eb->start);			\
 	u##bits res = le##bits##_to_cpu(p->member);			\
 	return res;							\
 }									\
 static inline void btrfs_set_##name(const struct extent_buffer *eb,	\
 				    u##bits val)			\
 {									\
-	type *p = page_address(eb->pages[0]);				\
+	type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \
 	p->member = cpu_to_le##bits(val);				\
 }
 
@@ -3241,6 +3242,39 @@ static inline void assertfail(const char *expr, const char* file, int line) { }
 #define ASSERT(expr)	(void)(expr)
 #endif
 
+/*
+ * Get the correct offset inside the page of extent buffer.
+ *
+ * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases.
+ *
+ * @eb:		The target extent buffer
+ * @start:	The offset inside the extent buffer
+ */
+static inline size_t get_eb_page_offset(const struct extent_buffer *eb,
+					unsigned long start)
+{
+	/*
+	 * For sectorsize == PAGE_SIZE case, eb->start will always be aligned
+	 * to PAGE_SIZE, thus adding it won't cause any difference.
+	 *
+	 * For sectorsize < PAGE_SIZE, we must only read the data belongs to
+	 * the eb, thus we have to take the eb->start into consideration.
+	 */
+	return offset_in_page(start + eb->start);
+}
+
+static inline unsigned long get_eb_page_index(unsigned long start)
+{
+	/*
+	 * For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough.
+	 *
+	 * For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE,
+	 * and has ensured all tree blocks are contained in one page, thus
+	 * we always get index == 0.
+	 */
+	return start >> PAGE_SHIFT;
+}
+
 /*
  * Use that for functions that are conditionally exported for sanity tests but
  * otherwise static
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 119193166cec..6fafbc1d047b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5637,7 +5637,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 	struct page *page;
 	char *kaddr;
 	char *dst = (char *)dstv;
-	unsigned long i = start >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(start);
 
 	if (start + len > eb->len) {
 		WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, wanted %lu %lu\n",
@@ -5646,7 +5646,7 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dstv,
 		return;
 	}
 
-	offset = offset_in_page(start);
+	offset = get_eb_page_offset(eb, start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5671,13 +5671,13 @@ int read_extent_buffer_to_user(const struct extent_buffer *eb,
 	struct page *page;
 	char *kaddr;
 	char __user *dst = (char __user *)dstv;
-	unsigned long i = start >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(start);
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start);
+	offset = get_eb_page_offset(eb, start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5706,13 +5706,13 @@ int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
 	struct page *page;
 	char *kaddr;
 	char *ptr = (char *)ptrv;
-	unsigned long i = start >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(start);
 	int ret = 0;
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start);
+	offset = get_eb_page_offset(eb, start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5738,7 +5738,7 @@ void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
 	char *kaddr;
 
 	WARN_ON(!PageUptodate(eb->pages[0]));
-	kaddr = page_address(eb->pages[0]);
+	kaddr = page_address(eb->pages[0]) + get_eb_page_offset(eb, 0);
 	memcpy(kaddr + offsetof(struct btrfs_header, chunk_tree_uuid), srcv,
 			BTRFS_FSID_SIZE);
 }
@@ -5748,7 +5748,7 @@ void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
 	char *kaddr;
 
 	WARN_ON(!PageUptodate(eb->pages[0]));
-	kaddr = page_address(eb->pages[0]);
+	kaddr = page_address(eb->pages[0]) + get_eb_page_offset(eb, 0);
 	memcpy(kaddr + offsetof(struct btrfs_header, fsid), srcv,
 			BTRFS_FSID_SIZE);
 }
@@ -5761,12 +5761,12 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 	struct page *page;
 	char *kaddr;
 	char *src = (char *)srcv;
-	unsigned long i = start >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(start);
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start);
+	offset = get_eb_page_offset(eb, start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5790,12 +5790,12 @@ void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	unsigned long i = start >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(start);
 
 	WARN_ON(start > eb->len);
 	WARN_ON(start + len > eb->start + eb->len);
 
-	offset = offset_in_page(start);
+	offset = get_eb_page_offset(eb, start);
 
 	while (len > 0) {
 		page = eb->pages[i];
@@ -5819,10 +5819,22 @@ void copy_extent_buffer_full(const struct extent_buffer *dst,
 
 	ASSERT(dst->len == src->len);
 
-	num_pages = num_extent_pages(dst);
-	for (i = 0; i < num_pages; i++)
-		copy_page(page_address(dst->pages[i]),
-				page_address(src->pages[i]));
+	if (dst->fs_info->sectorsize == PAGE_SIZE) {
+		num_pages = num_extent_pages(dst);
+		for (i = 0; i < num_pages; i++)
+			copy_page(page_address(dst->pages[i]),
+				  page_address(src->pages[i]));
+	} else {
+		unsigned long src_index = get_eb_page_index(src->start);
+		unsigned long dst_index = get_eb_page_index(dst->start);
+		size_t src_offset = get_eb_page_offset(src, 0);
+		size_t dst_offset = get_eb_page_offset(dst, 0);
+
+		ASSERT(src_index == 0 && dst_index == 0);
+		memcpy(page_address(dst->pages[dst_index]) + dst_offset,
+		       page_address(src->pages[src_index]) + src_offset,
+		       src->len);
+	}
 }
 
 void copy_extent_buffer(const struct extent_buffer *dst,
@@ -5835,11 +5847,11 @@ void copy_extent_buffer(const struct extent_buffer *dst,
 	size_t offset;
 	struct page *page;
 	char *kaddr;
-	unsigned long i = dst_offset >> PAGE_SHIFT;
+	unsigned long i = get_eb_page_index(dst_offset);
 
 	WARN_ON(src->len != dst_len);
 
-	offset = offset_in_page(dst_offset);
+	offset = get_eb_page_offset(dst, dst_offset);
 
 	while (len > 0) {
 		page = dst->pages[i];
@@ -5883,7 +5895,7 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb,
 	 * the bitmap item in the extent buffer + the offset of the byte in the
 	 * bitmap item.
 	 */
-	offset = start + byte_offset;
+	offset = start + offset_in_page(eb->start) + byte_offset;
 
 	*page_index = offset >> PAGE_SHIFT;
 	*page_offset = offset_in_page(offset);
@@ -6047,11 +6059,11 @@ void memcpy_extent_buffer(const struct extent_buffer *dst,
 	}
 
 	while (len > 0) {
-		dst_off_in_page = offset_in_page(dst_offset);
-		src_off_in_page = offset_in_page(src_offset);
+		dst_off_in_page = get_eb_page_offset(dst, dst_offset);
+		src_off_in_page = get_eb_page_offset(dst, src_offset);
 
-		dst_i = dst_offset >> PAGE_SHIFT;
-		src_i = src_offset >> PAGE_SHIFT;
+		dst_i = get_eb_page_index(dst_offset);
+		src_i = get_eb_page_index(src_offset);
 
 		cur = min(len, (unsigned long)(PAGE_SIZE -
 					       src_off_in_page));
@@ -6097,11 +6109,11 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 		return;
 	}
 	while (len > 0) {
-		dst_i = dst_end >> PAGE_SHIFT;
-		src_i = src_end >> PAGE_SHIFT;
+		dst_i = get_eb_page_index(dst_end);
+		src_i = get_eb_page_index(src_end);
 
-		dst_off_in_page = offset_in_page(dst_end);
-		src_off_in_page = offset_in_page(src_end);
+		dst_off_in_page = get_eb_page_offset(dst, dst_end);
+		src_off_in_page = get_eb_page_offset(dst, src_end);
 
 		cur = min_t(unsigned long, len, src_off_in_page + 1);
 		cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index 079b059818e9..769901c2b3c9 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -67,8 +67,9 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token,		\
 			       const void *ptr, unsigned long off)	\
 {									\
 	const unsigned long member_offset = (unsigned long)ptr + off;	\
-	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
-	const unsigned long oip = offset_in_page(member_offset);	\
+	const unsigned long idx = get_eb_page_index(member_offset);	\
+	const unsigned long oip = get_eb_page_offset(token->eb, 	\
+						     member_offset);	\
 	const int size = sizeof(u##bits);				\
 	u8 lebytes[sizeof(u##bits)];					\
 	const int part = PAGE_SIZE - oip;				\
@@ -95,8 +96,8 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb,		\
 			 const void *ptr, unsigned long off)		\
 {									\
 	const unsigned long member_offset = (unsigned long)ptr + off;	\
-	const unsigned long oip = offset_in_page(member_offset);	\
-	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	const unsigned long oip = get_eb_page_offset(eb, member_offset);\
+	const unsigned long idx = get_eb_page_index(member_offset);	\
 	char *kaddr = page_address(eb->pages[idx]);			\
 	const int size = sizeof(u##bits);				\
 	const int part = PAGE_SIZE - oip;				\
@@ -116,8 +117,9 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token,		\
 			    u##bits val)				\
 {									\
 	const unsigned long member_offset = (unsigned long)ptr + off;	\
-	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
-	const unsigned long oip = offset_in_page(member_offset);	\
+	const unsigned long idx = get_eb_page_index(member_offset);	\
+	const unsigned long oip = get_eb_page_offset(token->eb,		\
+						     member_offset);	\
 	const int size = sizeof(u##bits);				\
 	u8 lebytes[sizeof(u##bits)];					\
 	const int part = PAGE_SIZE - oip;				\
@@ -146,8 +148,8 @@ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr,	\
 		      unsigned long off, u##bits val)			\
 {									\
 	const unsigned long member_offset = (unsigned long)ptr + off;	\
-	const unsigned long oip = offset_in_page(member_offset);	\
-	const unsigned long idx = member_offset >> PAGE_SHIFT;		\
+	const unsigned long oip = get_eb_page_offset(eb, member_offset);\
+	const unsigned long idx = get_eb_page_index(member_offset);	\
 	char *kaddr = page_address(eb->pages[idx]);			\
 	const int size = sizeof(u##bits);				\
 	const int part = PAGE_SIZE - oip;				\

From patchwork Tue Sep  8 07:52:20 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762761
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 16C40746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:03 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id F3A1320C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:02 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729586AbgIHHxC (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:02 -0400
Received: from mx2.suse.de ([195.135.220.15]:51070 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729576AbgIHHxB (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:01 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 6183BAE24;
        Tue,  8 Sep 2020 07:53:01 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Subject: [PATCH 07/17] btrfs: make csum_tree_block() handle sectorsize smaller
 than page size
Date: Tue,  8 Sep 2020 15:52:20 +0800
Message-Id: <20200908075230.86856-8-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

For subpage size support, we only need to handle the first page.

To make the code work for both cases, we modify the following behaviors:

- num_pages calcuation
  Instead of "nodesize >> PAGE_SHIFT", we go
  "DIV_ROUND_UP(nodesize, PAGE_SIZE)", this ensures we get at least one
  page for subpage size support, while still get the same result for
  reguar page size.

- The length for the first run
  Instead of PAGE_SIZE - BTRFS_CSUM_SIZE, we go min(PAGE_SIZE, nodesize)
  - BTRFS_CSUM_SIZE.
  This allows us to handle both cases well.

- The start location of the first run
  Instead of always use BTRFS_CSUM_SIZE as csum start position, add
  offset_in_page(eb->start) to get proper offset for both cases.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/disk-io.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f6bba7eb1fa1..62dbd9bbd381 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -257,16 +257,16 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 static void csum_tree_block(struct extent_buffer *buf, u8 *result)
 {
 	struct btrfs_fs_info *fs_info = buf->fs_info;
-	const int num_pages = fs_info->nodesize >> PAGE_SHIFT;
+	const int num_pages = DIV_ROUND_UP(fs_info->nodesize, PAGE_SIZE);
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 	char *kaddr;
 	int i;
 
 	shash->tfm = fs_info->csum_shash;
 	crypto_shash_init(shash);
-	kaddr = page_address(buf->pages[0]);
+	kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
 	crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
-			    PAGE_SIZE - BTRFS_CSUM_SIZE);
+		min_t(u32, PAGE_SIZE, fs_info->nodesize) - BTRFS_CSUM_SIZE);
 
 	for (i = 1; i < num_pages; i++) {
 		kaddr = page_address(buf->pages[i]);

From patchwork Tue Sep  8 07:52:21 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762765
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id D688F59D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:12 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id C876120C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:12 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729591AbgIHHxM (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:12 -0400
Received: from mx2.suse.de ([195.135.220.15]:51098 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729576AbgIHHxE (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:04 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id D536FAE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:03 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 08/17] btrfs: refactor how we extract extent buffer from page
 for alloc_extent_buffer()
Date: Tue,  8 Sep 2020 15:52:21 +0800
Message-Id: <20200908075230.86856-9-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

This patch will extract the code to extract extent_buffer from
page::private into its own function, grab_extent_buffer_from_page().

Although it's just one line, for later sub-page size support it will
become way more larger.

Also add some extra comments why we need to do such page::private
dancing.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 49 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 6fafbc1d047b..3c8fe40f67fa 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5214,6 +5214,44 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
 }
 #endif
 
+/*
+ * A helper to grab the exist extent buffer from a page.
+ *
+ * There is a small race window where two callers of alloc_extent_buffer():
+ * 		Thread 1		|	Thread 2
+ * -------------------------------------+---------------------------------------
+ * alloc_extent_buffer()		| alloc_extent_buffer()
+ * |- eb = __alloc_extent_buffer()	| |- eb = __alloc_extent_buffer()
+ * |- p = find_or_create_page()		| |- p = find_or_create_page()
+ *
+ * In above case, two ebs get allocated for the same bytenr, and got the same
+ * page.
+ * We have to rely on the page->mapping->private_lock to make one of them to
+ * give up and reuse the allocated eb:
+ *
+ * 					| |- grab_extent_buffer_from_page()
+ * 					| |- get nothing
+ * 					| |- attach_extent_buffer_page()
+ * 					| |  |- Now page->private is set
+ * 					| |- spin_unlock(&mapping->private_lock);
+ * |- spin_lock(private_lock);		| |- Continue to insert radix tree.
+ * |- grab_extent_buffer_from_page()	|
+ * |- got eb from thread 2		|
+ * |- spin_unlock(private_lock);	|
+ * |- goto free_eb;			|
+ *
+ * The function here is to ensure we have proper locking and detect such race
+ * so we won't allocating an eb twice.
+ */
+static struct extent_buffer *grab_extent_buffer_from_page(struct page *page)
+{
+	/*
+	 * For PAGE_SIZE == sectorsize case, a btree_inode page should have its
+	 * private pointer as extent buffer who owns this page.
+	 */
+	return (struct extent_buffer *)page->private;
+}
+
 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 					  u64 start)
 {
@@ -5258,15 +5296,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 		spin_lock(&mapping->private_lock);
 		if (PagePrivate(p)) {
-			/*
-			 * We could have already allocated an eb for this page
-			 * and attached one so lets see if we can get a ref on
-			 * the existing eb, and if we can we know it's good and
-			 * we can just return that one, else we know we can just
-			 * overwrite page->private.
-			 */
-			exists = (struct extent_buffer *)p->private;
-			if (atomic_inc_not_zero(&exists->refs)) {
+			exists = grab_extent_buffer_from_page(p);
+			if (exists && atomic_inc_not_zero(&exists->refs)) {
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
 				put_page(p);

From patchwork Tue Sep  8 07:52:22 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762783
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6096659D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:34 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 5225421D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:34 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729667AbgIHHxb (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:31 -0400
Received: from mx2.suse.de ([195.135.220.15]:51120 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729597AbgIHHxG (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:06 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 9230BAE25
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:05 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 09/17] btrfs: refactor btrfs_release_extent_buffer_pages()
Date: Tue,  8 Sep 2020 15:52:22 +0800
Message-Id: <20200908075230.86856-10-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

We have attach_extent_buffer_page() and it get utilized in
btrfs_clone_extent_buffer() and alloc_extent_buffer().

But in btrfs_release_extent_buffer_pages() we manually call
detach_page_private().

This is fine for current code, but if we're going to support subpage
size, we will do a lot of more work other than just calling
detach_page_private().

This patch will extract the main work of btrfs_clone_extent_buffer()
into detach_extent_buffer_page() so that later subpage size support can
put their own code into them.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 58 +++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 3c8fe40f67fa..1cb41dab7a1d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4920,6 +4920,29 @@ int extent_buffer_under_io(const struct extent_buffer *eb)
 		test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 }
 
+static void detach_extent_buffer_page(struct extent_buffer *eb,
+				      struct page *page)
+{
+	bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
+
+	if (!page)
+		return;
+
+	if (mapped)
+		spin_lock(&page->mapping->private_lock);
+	if (PagePrivate(page) && page->private == (unsigned long)eb) {
+		BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+		BUG_ON(PageDirty(page));
+		BUG_ON(PageWriteback(page));
+		/* We need to make sure we haven't be attached to a new eb. */
+		detach_page_private(page);
+	}
+	if (mapped)
+		spin_unlock(&page->mapping->private_lock);
+	/* One for when we allocated the page */
+	put_page(page);
+}
+
 /*
  * Release all pages attached to the extent buffer.
  */
@@ -4927,43 +4950,12 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
 {
 	int i;
 	int num_pages;
-	int mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
 	BUG_ON(extent_buffer_under_io(eb));
 
 	num_pages = num_extent_pages(eb);
-	for (i = 0; i < num_pages; i++) {
-		struct page *page = eb->pages[i];
-
-		if (!page)
-			continue;
-		if (mapped)
-			spin_lock(&page->mapping->private_lock);
-		/*
-		 * We do this since we'll remove the pages after we've
-		 * removed the eb from the radix tree, so we could race
-		 * and have this page now attached to the new eb.  So
-		 * only clear page_private if it's still connected to
-		 * this eb.
-		 */
-		if (PagePrivate(page) &&
-		    page->private == (unsigned long)eb) {
-			BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-			BUG_ON(PageDirty(page));
-			BUG_ON(PageWriteback(page));
-			/*
-			 * We need to make sure we haven't be attached
-			 * to a new eb.
-			 */
-			detach_page_private(page);
-		}
-
-		if (mapped)
-			spin_unlock(&page->mapping->private_lock);
-
-		/* One for when we allocated the page */
-		put_page(page);
-	}
+	for (i = 0; i < num_pages; i++)
+		detach_extent_buffer_page(eb, eb->pages[i]);
 }
 
 /*

From patchwork Tue Sep  8 07:52:23 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762767
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 6E350159A
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:13 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 612D821D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:13 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729602AbgIHHxM (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:12 -0400
Received: from mx2.suse.de ([195.135.220.15]:51148 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729611AbgIHHxI (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:08 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id E5FEBAE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:07 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 10/17] btrfs: add assert_spin_locked() for
 attach_extent_buffer_page()
Date: Tue,  8 Sep 2020 15:52:23 +0800
Message-Id: <20200908075230.86856-11-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

When calling attach_extent_buffer_page(), either we're attaching
anonymous pages, called from btrfs_clone_extent_buffer().

Or we're attaching btree_inode pages, called from alloc_extent_buffer().

For the later case, we should have page->mapping->private_lock hold to
avoid race modifying page->private.

Add assert_spin_locked() if we're calling from alloc_extent_buffer().

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com> but see one nit below.
---
 fs/btrfs/extent_io.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1cb41dab7a1d..81e43d99feda 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3096,6 +3096,9 @@ static int submit_extent_page(unsigned int opf,
 static void attach_extent_buffer_page(struct extent_buffer *eb,
 				      struct page *page)
 {
+	if (page->mapping)
+		assert_spin_locked(&page->mapping->private_lock);
+
 	if (!PagePrivate(page))
 		attach_page_private(page, eb);
 	else

From patchwork Tue Sep  8 07:52:24 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762773
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 18F4C746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:23 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 0A9FF20C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:23 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729632AbgIHHxQ (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:16 -0400
Received: from mx2.suse.de ([195.135.220.15]:51166 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729615AbgIHHxL (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:11 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 0087EAE67
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:09 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 11/17] btrfs: extract the extent buffer verification from
 btree_readpage_end_io_hook()
Date: Tue,  8 Sep 2020 15:52:24 +0800
Message-Id: <20200908075230.86856-12-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

Currently btree_readpage_end_io_hook() only needs to handle one extent
buffer as currently one page only maps to one extent buffer.

But for incoming subpage support, one page can be mapped to multiple
extent buffers, thus we can no longer use current code.

This refactor would allow us to call btrfs_check_extent_buffer() on
all involved extent buffers at btree_readpage_end_io_hook() and other
locations.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
---
 fs/btrfs/disk-io.c | 78 ++++++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 33 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 62dbd9bbd381..f6e562979682 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -574,60 +574,37 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
 	return ret;
 }
 
-static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
-				      u64 phy_offset, struct page *page,
-				      u64 start, u64 end, int mirror)
+/* Do basic extent buffer check at read time */
+static int btrfs_check_extent_buffer(struct extent_buffer *eb)
 {
-	u64 found_start;
-	int found_level;
-	struct extent_buffer *eb;
-	struct btrfs_fs_info *fs_info;
+	struct btrfs_fs_info *fs_info = eb->fs_info;
 	u16 csum_size;
-	int ret = 0;
+	u64 found_start;
+	u8 found_level;
 	u8 result[BTRFS_CSUM_SIZE];
-	int reads_done;
-
-	if (!page->private)
-		goto out;
+	int ret = 0;
 
-	eb = (struct extent_buffer *)page->private;
-	fs_info = eb->fs_info;
 	csum_size = btrfs_super_csum_size(fs_info->super_copy);
 
-	/* the pending IO might have been the only thing that kept this buffer
-	 * in memory.  Make sure we have a ref for all this other checks
-	 */
-	atomic_inc(&eb->refs);
-
-	reads_done = atomic_dec_and_test(&eb->io_pages);
-	if (!reads_done)
-		goto err;
-
-	eb->read_mirror = mirror;
-	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
-		ret = -EIO;
-		goto err;
-	}
-
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != eb->start) {
 		btrfs_err_rl(fs_info, "bad tree block start, want %llu have %llu",
 			     eb->start, found_start);
 		ret = -EIO;
-		goto err;
+		goto out;
 	}
 	if (check_tree_block_fsid(eb)) {
 		btrfs_err_rl(fs_info, "bad fsid on block %llu",
 			     eb->start);
 		ret = -EIO;
-		goto err;
+		goto out;
 	}
 	found_level = btrfs_header_level(eb);
 	if (found_level >= BTRFS_MAX_LEVEL) {
 		btrfs_err(fs_info, "bad tree block level %d on %llu",
 			  (int)btrfs_header_level(eb), eb->start);
 		ret = -EIO;
-		goto err;
+		goto out;
 	}
 
 	btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
@@ -647,7 +624,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 			      fs_info->sb->s_id, eb->start,
 			      val, found, btrfs_header_level(eb));
 		ret = -EUCLEAN;
-		goto err;
+		goto out;
 	}
 
 	/*
@@ -669,6 +646,41 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 		btrfs_err(fs_info,
 			  "block=%llu read time tree block corruption detected",
 			  eb->start);
+out:
+	return ret;
+}
+
+static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+				      u64 phy_offset, struct page *page,
+				      u64 start, u64 end, int mirror)
+{
+	struct extent_buffer *eb;
+	int ret = 0;
+	int reads_done;
+
+	if (!page->private)
+		goto out;
+
+	eb = (struct extent_buffer *)page->private;
+
+	/*
+	 * The pending IO might have been the only thing that kept this buffer
+	 * in memory.  Make sure we have a ref for all this other checks
+	 */
+	atomic_inc(&eb->refs);
+
+	reads_done = atomic_dec_and_test(&eb->io_pages);
+	if (!reads_done)
+		goto err;
+
+	eb->read_mirror = mirror;
+	if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+		ret = -EIO;
+		goto err;
+	}
+
+	ret = btrfs_check_extent_buffer(eb);
+
 err:
 	if (reads_done &&
 	    test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))

From patchwork Tue Sep  8 07:52:25 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762779
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 59E21159A
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:29 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 4D1FC21D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:29 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729605AbgIHHxQ (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:16 -0400
Received: from mx2.suse.de ([195.135.220.15]:51186 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729339AbgIHHxM (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:12 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id B82C8AE96
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:11 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 12/17] btrfs: remove the unnecessary parameter @start and @len
 for check_data_csum()
Date: Tue,  8 Sep 2020 15:52:25 +0800
Message-Id: <20200908075230.86856-13-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

For check_data_csum(), the page we're using is directly from inode
mapping, thus it has valid page_offset().

We can use (page_offset() + pg_off) to replace @start parameter
completely, while the @len should always be sectorsize.

Since we're here, also add some comment, since there are quite some
confusion in words like start/offset, without explaining whether it's
file_offset or logical bytenr.

This should not affect the existing behavior, as for current sectorsize
== PAGE_SIZE case, @pgoff should always be 0, and len is always
PAGE_SIZE (or sectorsize from the dio read path).

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/inode.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9570458aa847..078735aa0f68 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2793,17 +2793,30 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
 	btrfs_queue_work(wq, &ordered_extent->work);
 }
 
+/*
+ * Verify the checksum of one sector of uncompressed data.
+ *
+ * @inode:	The inode.
+ * @io_bio:	The btrfs_io_bio which contains the csum.
+ * @icsum:	The csum offset (by number of sectors).
+ * @page:	The page where the data will be written to.
+ * @pgoff:	The offset inside the page.
+ *
+ * The length of such check is always one sector size.
+ */
 static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
-			   int icsum, struct page *page, int pgoff, u64 start,
-			   size_t len)
+			   int icsum, struct page *page, int pgoff)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 	char *kaddr;
+	u32 len = fs_info->sectorsize;
 	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 	u8 *csum_expected;
 	u8 csum[BTRFS_CSUM_SIZE];
 
+	ASSERT(pgoff + len <= PAGE_SIZE);
+
 	csum_expected = ((u8 *)io_bio->csum) + icsum * csum_size;
 
 	kaddr = kmap_atomic(page);
@@ -2817,8 +2830,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
 	kunmap_atomic(kaddr);
 	return 0;
 zeroit:
-	btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
-				    io_bio->mirror_num);
+	btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
+				    csum, csum_expected, io_bio->mirror_num);
 	if (io_bio->device)
 		btrfs_dev_stat_inc_and_print(io_bio->device,
 					     BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -2857,8 +2870,7 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	}
 
 	phy_offset >>= inode->i_sb->s_blocksize_bits;
-	return check_data_csum(inode, io_bio, phy_offset, page, offset, start,
-			       (size_t)(end - start + 1));
+	return check_data_csum(inode, io_bio, phy_offset, page, offset);
 }
 
 /*
@@ -7545,8 +7557,7 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
 			ASSERT(pgoff < PAGE_SIZE);
 			if (uptodate &&
 			    (!csum || !check_data_csum(inode, io_bio, icsum,
-						       bvec.bv_page, pgoff,
-						       start, sectorsize))) {
+						       bvec.bv_page, pgoff))) {
 				clean_io_failure(fs_info, failure_tree, io_tree,
 						 start, bvec.bv_page,
 						 btrfs_ino(BTRFS_I(inode)),

From patchwork Tue Sep  8 07:52:26 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762769
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id B4C3659D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:18 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 9F72921D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:18 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729635AbgIHHxS (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:18 -0400
Received: from mx2.suse.de ([195.135.220.15]:51242 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729611AbgIHHxO (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:14 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 870C2AE67
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:13 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 13/17] btrfs: extent_io: only require sector size alignment
 for page read
Date: Tue,  8 Sep 2020 15:52:26 +0800
Message-Id: <20200908075230.86856-14-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

If we're reading partial page, btrfs will warn about this as our
read/write are always done in sector size, which equals page size.

But for the incoming subpage RO support, our data read is only aligned
to sectorsize, which can be smaller than page size.

Thus here we change the warning condition to check it against
sectorsize, thus the behavior is not changed for regular sectorsize ==
PAGE_SIZE case, while won't report error for subpage read.

Also, pass the proper start/end with bv_offset for check_data_csum() to
handle.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 81e43d99feda..a83b63ecc5f8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2819,6 +2819,7 @@ static void end_bio_extent_readpage(struct bio *bio)
 		struct page *page = bvec->bv_page;
 		struct inode *inode = page->mapping->host;
 		struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+		u32 sectorsize = fs_info->sectorsize;
 		bool data_inode = btrfs_ino(BTRFS_I(inode))
 			!= BTRFS_BTREE_INODE_OBJECTID;
 
@@ -2829,13 +2830,17 @@ static void end_bio_extent_readpage(struct bio *bio)
 		tree = &BTRFS_I(inode)->io_tree;
 		failure_tree = &BTRFS_I(inode)->io_failure_tree;
 
-		/* We always issue full-page reads, but if some block
+		/*
+		 * We always issue full-sector reads, but if some block
 		 * in a page fails to read, blk_update_request() will
 		 * advance bv_offset and adjust bv_len to compensate.
-		 * Print a warning for nonzero offsets, and an error
-		 * if they don't add up to a full page.  */
-		if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
-			if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
+		 * Print a warning for unaligned offsets, and an error
+		 * if they don't add up to a full sector.
+		 */
+		if (!IS_ALIGNED(bvec->bv_offset, sectorsize) ||
+		    !IS_ALIGNED(bvec->bv_offset + bvec->bv_len, sectorsize)) {
+			if (!IS_ALIGNED(bvec->bv_offset + bvec->bv_len,
+					sectorsize))
 				btrfs_err(fs_info,
 					"partial page read in btrfs with offset %u and length %u",
 					bvec->bv_offset, bvec->bv_len);
@@ -2845,8 +2850,8 @@ static void end_bio_extent_readpage(struct bio *bio)
 					bvec->bv_offset, bvec->bv_len);
 		}
 
-		start = page_offset(page);
-		end = start + bvec->bv_offset + bvec->bv_len - 1;
+		start = page_offset(page) + bvec->bv_offset;
+		end = start + bvec->bv_len - 1;
 		len = bvec->bv_len;
 
 		mirror = io_bio->mirror_num;

From patchwork Tue Sep  8 07:52:27 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762771
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 84CEA59D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:19 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 6C98221D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:19 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729645AbgIHHxS (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:18 -0400
Received: from mx2.suse.de ([195.135.220.15]:51284 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729626AbgIHHxR (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:17 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id DD8E9AE24;
        Tue,  8 Sep 2020 07:53:15 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Subject: [PATCH 14/17] btrfs: make btrfs_readpage_end_io_hook() follow sector
 size
Date: Tue,  8 Sep 2020 15:52:27 +0800
Message-Id: <20200908075230.86856-15-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

Currently btrfs_readpage_end_io_hook() just pass the whole page to
check_data_csum(), which is fine since we only support sectorsize ==
PAGE_SIZE.

To support subpage RO support, we need to properly honor per-sector
checksum verification, just like what we did in dio read path.

This patch will do the csum verification in a for loop, starts with
pg_off == start - page_offset(page), with sectorsize increasement for
each loop.

For sectorsize == PAGE_SIZE case, the pg_off will always be 0, and we
will only finish with just one loop.

For subpage, we do the proper loop.

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/inode.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 078735aa0f68..8bd14dda2067 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2851,9 +2851,12 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 				      u64 start, u64 end, int mirror)
 {
 	size_t offset = start - page_offset(page);
+	size_t pg_off;
 	struct inode *inode = page->mapping->host;
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
+	u32 sectorsize = root->fs_info->sectorsize;
+	bool found_err = false;
 
 	if (PageChecked(page)) {
 		ClearPageChecked(page);
@@ -2870,7 +2873,17 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	}
 
 	phy_offset >>= inode->i_sb->s_blocksize_bits;
-	return check_data_csum(inode, io_bio, phy_offset, page, offset);
+	for (pg_off = offset; pg_off < end - page_offset(page);
+	     pg_off += sectorsize, phy_offset++) {
+		int ret;
+
+		ret = check_data_csum(inode, io_bio, phy_offset, page, pg_off);
+		if (ret < 0)
+			found_err = true;
+	}
+	if (found_err)
+		return -EIO;
+	return 0;
 }
 
 /*

From patchwork Tue Sep  8 07:52:28 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762775
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 61641746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:25 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 506EC20C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:25 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729655AbgIHHxX (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:23 -0400
Received: from mx2.suse.de ([195.135.220.15]:51306 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729633AbgIHHxT (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:19 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id B4A2DAE67
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:17 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 15/17] btrfs: introduce subpage_eb_mapping for extent buffers
Date: Tue,  8 Sep 2020 15:52:28 +0800
Message-Id: <20200908075230.86856-16-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

One of the design blockage for subpage support is the btree inode
page::private mapping.

Currently page::private for btree inode is a pointer to extent buffer
who owns this page.
This is fine for sectorsize == PAGE_SIZE case, but not suitable for
subpage size support, as in that case one page can hold multiple tree
blocks.

So to support subpage, here we introduce a new structure,
subpage_eb_mapping, to record how many extent buffers are referring to
one page.

It uses a bitmap (at most 16 bits used) to record tree blocks, and a
extent buffer pointers array (at most 16 too) to record the owners.

This patch will modify the following functions to add subpage support
using subpage_eb_mapping structure:
- attach_extent_buffer_page()
- detach_extent_buffer_page()
- grab_extent_buffer_from_page()
- try_release_extent_buffer()

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 fs/btrfs/extent_io.c | 221 ++++++++++++++++++++++++++++++++++++++++---
 fs/btrfs/extent_io.h |   3 +
 2 files changed, 212 insertions(+), 12 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a83b63ecc5f8..87b3bb781532 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -29,6 +29,34 @@ static struct kmem_cache *extent_state_cache;
 static struct kmem_cache *extent_buffer_cache;
 static struct bio_set btrfs_bioset;
 
+/* Upper limit of how many extent buffers can be stored in one page */
+#define SUBPAGE_NR_EXTENT_BUFFERS (SZ_64K / SZ_4K)
+/*
+ * Structure for subpage support, recording the page -> extent buffer mapping
+ *
+ * For subpage support, one 64K page can contain several tree blocks, other than
+ * 1:1 page <-> extent buffer mapping from sectorsize == PAGE_SIZE case.
+ */
+struct subpage_eb_mapping {
+	/*
+	 * Which range has extent buffer.
+	 *
+	 * One bit represents one sector, bit nr represents the offset in page.
+	 * At most 16 bits are utilized.
+	 */
+	unsigned long bitmap;
+
+	/* We only support 64K PAGE_SIZE system to mount 4K sectorsize fs */
+	struct extent_buffer *buffers[SUBPAGE_NR_EXTENT_BUFFERS];
+};
+
+struct btrfs_fs_info *page_to_fs_info(struct page *page)
+{
+	ASSERT(page && page->mapping);
+
+	return BTRFS_I(page->mapping->host)->root->fs_info;
+}
+
 static inline bool extent_state_in_tree(const struct extent_state *state)
 {
 	return !RB_EMPTY_NODE(&state->rb_node);
@@ -3098,12 +3126,50 @@ static int submit_extent_page(unsigned int opf,
 	return ret;
 }
 
+static void attach_subpage_mapping(struct extent_buffer *eb,
+				   struct page *page,
+				   struct subpage_eb_mapping *mapping)
+{
+	u32 sectorsize = eb->fs_info->sectorsize;
+	u32 nodesize = eb->fs_info->nodesize;
+	int index_start = (eb->start - page_offset(page)) / sectorsize;
+	int nr_bits = nodesize / sectorsize;
+	int i;
+
+	ASSERT(mapping);
+	if (!PagePrivate(page)) {
+		/* Attach mapping to page::private and initialize */
+		memset(mapping, 0, sizeof(*mapping));
+		attach_page_private(page, mapping);
+	} else {
+		/* Use the existing page::private as mapping */
+		kfree(mapping);
+		mapping = (struct subpage_eb_mapping *) page->private;
+	}
+
+	/* Set the bitmap and pointers */
+	for (i = index_start; i < index_start + nr_bits; i++) {
+		set_bit(i, &mapping->bitmap);
+		mapping->buffers[i] = eb;
+	}
+}
+
 static void attach_extent_buffer_page(struct extent_buffer *eb,
-				      struct page *page)
+				      struct page *page,
+				      struct subpage_eb_mapping *mapping)
 {
+	bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
 	if (page->mapping)
 		assert_spin_locked(&page->mapping->private_lock);
 
+	if (subpage && page->mapping) {
+		attach_subpage_mapping(eb, page, mapping);
+		return;
+	}
+	/*
+	 * Anonymous page and sectorsize == PAGE_SIZE uses page::private as a
+	 * pointer to eb directly.
+	 */
 	if (!PagePrivate(page))
 		attach_page_private(page, eb);
 	else
@@ -4928,16 +4994,61 @@ int extent_buffer_under_io(const struct extent_buffer *eb)
 		test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 }
 
+static void detach_subpage_mapping(struct extent_buffer *eb, struct page *page)
+{
+	struct subpage_eb_mapping *mapping;
+	u32 sectorsize = eb->fs_info->sectorsize;
+	int start_index;
+	int nr_bits = eb->fs_info->nodesize / sectorsize;
+	int i;
+
+	/* Page already detached */
+	if (!PagePrivate(page))
+		return;
+
+	assert_spin_locked(&page->mapping->private_lock);
+	ASSERT(eb->start >= page_offset(page) &&
+	       eb->start < page_offset(page) + PAGE_SIZE);
+
+	mapping = (struct subpage_eb_mapping *)page->private;
+	start_index = (eb->start - page_offset(page)) / sectorsize;
+
+	for (i = start_index; i < start_index + nr_bits; i++) {
+		if (test_bit(i, &mapping->bitmap) &&
+		    mapping->buffers[i] == eb) {
+			clear_bit(i, &mapping->bitmap);
+			mapping->buffers[i] = NULL;
+		}
+	}
+
+	/* Are we the last owner ? */
+	if (mapping->bitmap == 0) {
+		kfree(mapping);
+		detach_page_private(page);
+		/* One for the first time allocated the page */
+		put_page(page);
+	}
+}
+
 static void detach_extent_buffer_page(struct extent_buffer *eb,
 				      struct page *page)
 {
 	bool mapped = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
+	bool subpage = (eb->fs_info->sectorsize < PAGE_SIZE);
 
 	if (!page)
 		return;
 
 	if (mapped)
 		spin_lock(&page->mapping->private_lock);
+
+	if (subpage && page->mapping) {
+		detach_subpage_mapping(eb, page);
+		if (mapped)
+			spin_unlock(&page->mapping->private_lock);
+		return;
+	}
+
 	if (PagePrivate(page) && page->private == (unsigned long)eb) {
 		BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
 		BUG_ON(PageDirty(page));
@@ -5035,7 +5146,7 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
 			btrfs_release_extent_buffer(new);
 			return NULL;
 		}
-		attach_extent_buffer_page(new, p);
+		attach_extent_buffer_page(new, p, NULL);
 		WARN_ON(PageDirty(p));
 		SetPageUptodate(p);
 		new->pages[i] = p;
@@ -5243,8 +5354,31 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
  * The function here is to ensure we have proper locking and detect such race
  * so we won't allocating an eb twice.
  */
-static struct extent_buffer *grab_extent_buffer_from_page(struct page *page)
+static struct extent_buffer *grab_extent_buffer_from_page(struct page *page,
+							  u64 bytenr)
 {
+	struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+	bool subpage = (fs_info->sectorsize < PAGE_SIZE);
+
+	if (!PagePrivate(page))
+		return NULL;
+
+	if (subpage) {
+		struct subpage_eb_mapping *mapping;
+		u32 sectorsize = fs_info->sectorsize;
+		int start_index;
+
+		ASSERT(bytenr >= page_offset(page) &&
+		       bytenr < page_offset(page) + PAGE_SIZE);
+
+		start_index = (bytenr - page_offset(page)) / sectorsize;
+		mapping = (struct subpage_eb_mapping *)page->private;
+
+		if (test_bit(start_index, &mapping->bitmap))
+			return mapping->buffers[start_index];
+		return NULL;
+	}
+
 	/*
 	 * For PAGE_SIZE == sectorsize case, a btree_inode page should have its
 	 * private pointer as extent buffer who owns this page.
@@ -5263,6 +5397,8 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	struct extent_buffer *exists = NULL;
 	struct page *p;
 	struct address_space *mapping = fs_info->btree_inode->i_mapping;
+	struct subpage_eb_mapping *subpage_mapping = NULL;
+	bool subpage = (fs_info->sectorsize < PAGE_SIZE);
 	int uptodate = 1;
 	int ret;
 
@@ -5286,6 +5422,14 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 	if (!eb)
 		return ERR_PTR(-ENOMEM);
 
+	if (subpage) {
+		subpage_mapping = kmalloc(sizeof(*subpage_mapping), GFP_NOFS);
+		if (!mapping) {
+			exists = ERR_PTR(-ENOMEM);
+			goto free_eb;
+		}
+	}
+
 	num_pages = num_extent_pages(eb);
 	for (i = 0; i < num_pages; i++, index++) {
 		p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
@@ -5296,7 +5440,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 		spin_lock(&mapping->private_lock);
 		if (PagePrivate(p)) {
-			exists = grab_extent_buffer_from_page(p);
+			exists = grab_extent_buffer_from_page(p, start);
 			if (exists && atomic_inc_not_zero(&exists->refs)) {
 				spin_unlock(&mapping->private_lock);
 				unlock_page(p);
@@ -5306,16 +5450,19 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 			}
 			exists = NULL;
 
-			/*
-			 * Do this so attach doesn't complain and we need to
-			 * drop the ref the old guy had.
-			 */
-			ClearPagePrivate(p);
-			WARN_ON(PageDirty(p));
-			put_page(p);
+			if (!subpage) {
+				/*
+				 * Do this so attach doesn't complain and we
+				 * need to drop the ref the old guy had.
+				 */
+				ClearPagePrivate(p);
+				WARN_ON(PageDirty(p));
+				put_page(p);
+			}
 		}
-		attach_extent_buffer_page(eb, p);
+		attach_extent_buffer_page(eb, p, subpage_mapping);
 		spin_unlock(&mapping->private_lock);
+		subpage_mapping = NULL;
 		WARN_ON(PageDirty(p));
 		eb->pages[i] = p;
 		if (!PageUptodate(p))
@@ -5365,6 +5512,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
 
 free_eb:
 	WARN_ON(!atomic_dec_and_test(&eb->refs));
+	kfree(subpage_mapping);
 	for (i = 0; i < num_pages; i++) {
 		if (eb->pages[i])
 			unlock_page(eb->pages[i]);
@@ -6158,8 +6306,49 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 	}
 }
 
+int try_release_subpage_ebs(struct page *page)
+{
+	struct subpage_eb_mapping *mapping;
+	int i;
+
+	assert_spin_locked(&page->mapping->private_lock);
+	if (!PagePrivate(page))
+		return 1;
+
+	mapping = (struct subpage_eb_mapping *)page->private;
+	for (i = 0; i < SUBPAGE_NR_EXTENT_BUFFERS && PagePrivate(page); i++) {
+		struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+		struct extent_buffer *eb;
+		int ret;
+
+		if (!test_bit(i, &mapping->bitmap))
+			continue;
+
+		eb = mapping->buffers[i];
+		spin_unlock(&page->mapping->private_lock);
+		spin_lock(&eb->refs_lock);
+		ret = release_extent_buffer(eb);
+		spin_lock(&page->mapping->private_lock);
+
+		/*
+		 * Extent buffer can't be freed yet, must jump to next slot
+		 * and avoid calling release_extent_buffer().
+		 */
+		if (!ret)
+			i += (fs_info->nodesize / fs_info->sectorsize - 1);
+	}
+	/*
+	 * detach_subpage_mapping() from release_extent_buffer() has detached
+	 * all ebs from this page. All related ebs are released.
+	 */
+	if (!PagePrivate(page))
+		return 1;
+	return 0;
+}
+
 int try_release_extent_buffer(struct page *page)
 {
+	bool subpage = (page_to_fs_info(page)->sectorsize < PAGE_SIZE);
 	struct extent_buffer *eb;
 
 	/*
@@ -6172,6 +6361,14 @@ int try_release_extent_buffer(struct page *page)
 		return 1;
 	}
 
+	if (subpage) {
+		int ret;
+
+		ret = try_release_subpage_ebs(page);
+		spin_unlock(&page->mapping->private_lock);
+		return ret;
+	}
+
 	eb = (struct extent_buffer *)page->private;
 	BUG_ON(!eb);
 
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index e16c5449ba48..6593b6883438 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -184,6 +184,9 @@ static inline int extent_compress_type(unsigned long bio_flags)
 	return bio_flags >> EXTENT_BIO_FLAG_SHIFT;
 }
 
+/* Unable to inline it due to the requirement for both ASSERT() and BTRFS_I() */
+struct btrfs_fs_info *page_to_fs_info(struct page *page);
+
 struct extent_map_tree;
 
 typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,

From patchwork Tue Sep  8 07:52:29 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762781
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 21A64746
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:30 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 10F1C21D20
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:30 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729662AbgIHHx3 (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:29 -0400
Received: from mx2.suse.de ([195.135.220.15]:51336 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729626AbgIHHxU (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:20 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 63291AE24
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:19 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 16/17] btrfs: handle extent buffer verification proper for
 subpage size
Date: Tue,  8 Sep 2020 15:52:29 +0800
Message-Id: <20200908075230.86856-17-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

Unlike regular PAGE_SIZE == sectorsize case, one btree inode page can
contain several tree blocks.

This makes the csum and other basic tree block verification very tricky,
as in btree_readpage_end_io_hook(), we can only check the extent buffer
who triggers this page read, not the remaining tree blocks in the same
page.

So this patch will change the timing of tree block verification to the
following timings:
- btree_readpage_end_io_hook()
  This is the old timing, but now we check all known extent buffers of
  the page.

- read_extent_buffer_pages()
  This is the new timing exclusive for subpage support.
  Now if an extent buffer finds all its page (only 1 for subpage) is
  already uptodate, it still needs to check if we have already checked
  the extent buffer.
  If not, then call btrfs_check_extent_buffer() to verify the extent
  buffer.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c   |   5 +-
 fs/btrfs/disk-io.h   |   1 +
 fs/btrfs/extent_io.c | 111 ++++++++++++++++++++++++++++++++++++++++++-
 fs/btrfs/extent_io.h |   1 +
 4 files changed, 116 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f6e562979682..5153c0420e7e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -575,7 +575,7 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
 }
 
 /* Do basic extent buffer check at read time */
-static int btrfs_check_extent_buffer(struct extent_buffer *eb)
+int btrfs_check_extent_buffer(struct extent_buffer *eb)
 {
 	struct btrfs_fs_info *fs_info = eb->fs_info;
 	u16 csum_size;
@@ -661,6 +661,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	if (!page->private)
 		goto out;
 
+	if (page_to_fs_info(page)->sectorsize < PAGE_SIZE)
+		return btrfs_verify_subpage_extent_buffers(page, mirror);
+
 	eb = (struct extent_buffer *)page->private;
 
 	/*
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 00dc39d47ed3..ac42b622f113 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -129,6 +129,7 @@ struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
 int __init btrfs_end_io_wq_init(void);
 void __cold btrfs_end_io_wq_exit(void);
+int btrfs_check_extent_buffer(struct extent_buffer *eb);
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 void btrfs_init_lockdep(void);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 87b3bb781532..8c5bb53265ab 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -46,6 +46,9 @@ struct subpage_eb_mapping {
 	 */
 	unsigned long bitmap;
 
+	/* Which range of ebs has been verified */
+	unsigned long verified;
+
 	/* We only support 64K PAGE_SIZE system to mount 4K sectorsize fs */
 	struct extent_buffer *buffers[SUBPAGE_NR_EXTENT_BUFFERS];
 };
@@ -5017,6 +5020,7 @@ static void detach_subpage_mapping(struct extent_buffer *eb, struct page *page)
 		if (test_bit(i, &mapping->bitmap) &&
 		    mapping->buffers[i] == eb) {
 			clear_bit(i, &mapping->bitmap);
+			clear_bit(i, &mapping->verified);
 			mapping->buffers[i] = NULL;
 		}
 	}
@@ -5696,6 +5700,38 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
 	}
 }
 
+/*
+ * For subpage, one btree page can already be uptodate (read by other tree
+ * blocks in the same page), but we haven't verified the csum of the tree
+ * block.
+ *
+ * So we need to do extra check for uptodate page of the extent buffer.
+ */
+static int check_uptodate_extent_buffer_page(struct extent_buffer *eb)
+{
+	struct btrfs_fs_info *fs_info = eb->fs_info;
+	struct subpage_eb_mapping *eb_mapping;
+	struct page *page = eb->pages[0];
+	int nr_bit;
+	int ret;
+
+	if (fs_info->sectorsize == PAGE_SIZE)
+		return 0;
+
+	nr_bit = (eb->start - page_offset(page)) / fs_info->sectorsize;
+	spin_lock(&page->mapping->private_lock);
+	eb_mapping = (struct subpage_eb_mapping *)page->private;
+	if (test_bit(nr_bit, &eb_mapping->verified)) {
+		spin_unlock(&page->mapping->private_lock);
+		return 0;
+	}
+	spin_unlock(&page->mapping->private_lock);
+	ret = btrfs_check_extent_buffer(eb);
+	if (!ret)
+		set_bit(nr_bit, &eb_mapping->verified);
+	return ret;
+}
+
 int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 {
 	int i;
@@ -5737,7 +5773,9 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
 	}
 
 	if (all_uptodate) {
-		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+		ret = check_uptodate_extent_buffer_page(eb);
+		if (!ret)
+			set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 		goto unlock_exit;
 	}
 
@@ -6396,3 +6434,74 @@ int try_release_extent_buffer(struct page *page)
 
 	return release_extent_buffer(eb);
 }
+
+/*
+ * Verify all referred extent buffers in one page for subpage support.
+ *
+ * This is called in btree_readpage_end_io_hook(), where we still have the
+ * page locked.
+ * Here we only check the extent buffer who triggers the page read, so it
+ * doesn't cover all extent buffers contained by this page.
+ *
+ * We still need to do the same check for read_extent_buffer_pages() where
+ * the page of the extent buffer is already uptodate.
+ */
+int btrfs_verify_subpage_extent_buffers(struct page *page, int mirror)
+{
+	struct btrfs_fs_info *fs_info = page_to_fs_info(page);
+	struct extent_buffer *eb;
+	struct subpage_eb_mapping *eb_mapping;
+	int nr_bits = (fs_info->nodesize / fs_info->sectorsize);
+	int i;
+	int ret = 0;
+
+	spin_lock(&page->mapping->private_lock);
+	eb_mapping = (struct subpage_eb_mapping *)page->private;
+	for (i = 0; i < SUBPAGE_NR_EXTENT_BUFFERS; i++) {
+		int reads_done;
+		int j;
+
+		if (!test_bit(i, &eb_mapping->bitmap))
+			continue;
+
+		eb = eb_mapping->buffers[i];
+		spin_unlock(&page->mapping->private_lock);
+
+		atomic_inc(&eb->refs);
+		reads_done = atomic_dec_and_test(&eb->io_pages);
+
+		/*
+		 * For subpage tree block, all tree read should be contained in
+		 * one page, thus the read should always be done.
+		 */
+		ASSERT(reads_done);
+
+		eb->read_mirror = mirror;
+		if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
+			ret = -EIO;
+			atomic_inc(&eb->io_pages);
+			clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+			free_extent_buffer(eb);
+			goto out;
+		}
+
+		ret = btrfs_check_extent_buffer(eb);
+		if (ret < 0) {
+			atomic_inc(&eb->io_pages);
+			clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
+			free_extent_buffer(eb);
+			goto out;
+		}
+		for (j = i; j < i + nr_bits; j++)
+			set_bit(j, &eb_mapping->verified);
+
+		/* Go to next eb directly */
+		i += (nr_bits - 1);
+
+		free_extent_buffer(eb);
+		spin_lock(&page->mapping->private_lock);
+	}
+	spin_unlock(&page->mapping->private_lock);
+out:
+	return ret;
+}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 6593b6883438..d714e05178b5 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -330,6 +330,7 @@ blk_status_t btrfs_submit_read_repair(struct inode *inode,
 				      struct page *page, unsigned int pgoff,
 				      u64 start, u64 end, int failed_mirror,
 				      submit_bio_hook_t *submit_bio_hook);
+int btrfs_verify_subpage_extent_buffers(struct page *page, int mirror);
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,

From patchwork Tue Sep  8 07:52:30 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Qu Wenruo <wqu@suse.com>
X-Patchwork-Id: 11762777
Return-Path: <SRS0=ITJn=CR=vger.kernel.org=linux-btrfs-owner@kernel.org>
Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org
 [172.30.200.123])
	by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1944059D
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:29 +0000 (UTC)
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by mail.kernel.org (Postfix) with ESMTP id 093AF20C09
	for <patchwork-linux-btrfs@patchwork.kernel.org>;
 Tue,  8 Sep 2020 07:53:29 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1729633AbgIHHxZ (ORCPT
        <rfc822;patchwork-linux-btrfs@patchwork.kernel.org>);
        Tue, 8 Sep 2020 03:53:25 -0400
Received: from mx2.suse.de ([195.135.220.15]:51350 "EHLO mx2.suse.de"
        rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
        id S1729646AbgIHHxW (ORCPT <rfc822;linux-btrfs@vger.kernel.org>);
        Tue, 8 Sep 2020 03:53:22 -0400
X-Virus-Scanned: by amavisd-new at test-mx.suse.de
Received: from relay2.suse.de (unknown [195.135.221.27])
        by mx2.suse.de (Postfix) with ESMTP id 1B706AE25
        for <linux-btrfs@vger.kernel.org>;
 Tue,  8 Sep 2020 07:53:21 +0000 (UTC)
From: Qu Wenruo <wqu@suse.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH 17/17] btrfs: allow RO mount of 4K sector size fs on 64K page
 system
Date: Tue,  8 Sep 2020 15:52:30 +0800
Message-Id: <20200908075230.86856-18-wqu@suse.com>
X-Mailer: git-send-email 2.28.0
In-Reply-To: <20200908075230.86856-1-wqu@suse.com>
References: <20200908075230.86856-1-wqu@suse.com>
MIME-Version: 1.0
Sender: linux-btrfs-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-btrfs.vger.kernel.org>
X-Mailing-List: linux-btrfs@vger.kernel.org

This adds the basic RO mount ability for 4K sector size on 64K page
system.

Currently we only plan to support 4K and 64K page system.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/disk-io.c | 24 +++++++++++++++++++++---
 fs/btrfs/super.c   |  7 +++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5153c0420e7e..9e3938e68355 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2446,13 +2446,21 @@ static int validate_super(struct btrfs_fs_info *fs_info,
 		btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
 		ret = -EINVAL;
 	}
-	/* Only PAGE SIZE is supported yet */
-	if (sectorsize != PAGE_SIZE) {
+
+	/*
+	 * For 4K page size, we only support 4K sector size.
+	 * For 64K page size, we support RW for 64K sector size, and RO for
+	 * 4K sector size.
+	 */
+	if ((PAGE_SIZE == SZ_4K && sectorsize != PAGE_SIZE) ||
+	    (PAGE_SIZE == SZ_64K && (sectorsize != SZ_4K &&
+				     sectorsize != SZ_64K))) {
 		btrfs_err(fs_info,
-			"sectorsize %llu not supported yet, only support %lu",
+			"sectorsize %llu not supported yet for page size %lu",
 			sectorsize, PAGE_SIZE);
 		ret = -EINVAL;
 	}
+
 	if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
 	    nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
 		btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
@@ -3100,6 +3108,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_alloc;
 	}
 
+	/* For 4K sector size support, it's only read-only yet */
+	if (PAGE_SIZE == SZ_64K && sectorsize == SZ_4K) {
+		if (!sb_rdonly(sb) || btrfs_super_log_root(disk_super)) {
+			btrfs_err(fs_info,
+				"subpage sector size only support RO yet");
+			err = -EINVAL;
+			goto fail_alloc;
+		}
+	}
+
 	ret = btrfs_init_workqueues(fs_info, fs_devices);
 	if (ret) {
 		err = ret;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 25967ecaaf0a..edc731780d64 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1922,6 +1922,13 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 			ret = -EINVAL;
 			goto restore;
 		}
+		if (fs_info->sectorsize < PAGE_SIZE) {
+			btrfs_warn(fs_info,
+	"read-write mount is not yet allowed for sector size %u page size %lu",
+				   fs_info->sectorsize, PAGE_SIZE);
+			ret = -EINVAL;
+			goto restore;
+		}
 
 		ret = btrfs_cleanup_fs_roots(fs_info);
 		if (ret)