diff mbox series

[v3,16/22] btrfs: extent_io: implement try_release_extent_buffer() for subpage metadata support

Message ID 20210106010201.37864-17-wqu@suse.com (mailing list archive)
State New, archived
Headers show
Series btrfs: add read-only support for subpage sector size | expand

Commit Message

Qu Wenruo Jan. 6, 2021, 1:01 a.m. UTC
Unlike the original try_release_extent_buffer,
try_release_subpage_extent_buffer() will iterate through
btrfs_subpage::tree_block_bitmap, and try to release each extent buffer.

Signed-off-by: Qu Wenruo <wqu@suse.com>
---
 fs/btrfs/extent_io.c | 76 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

Comments

Qu Wenruo Jan. 6, 2021, 8:24 a.m. UTC | #1
On 2021/1/6 上午9:01, Qu Wenruo wrote:
> Unlike the original try_release_extent_buffer,
> try_release_subpage_extent_buffer() will iterate through
> btrfs_subpage::tree_block_bitmap, and try to release each extent buffer.
> 
> Signed-off-by: Qu Wenruo <wqu@suse.com>
> ---
>   fs/btrfs/extent_io.c | 76 ++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 76 insertions(+)
> 
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index 194cb8b63216..792264f5c3c2 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -6258,10 +6258,86 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
>   	}
>   }
>   
> +static int try_release_subpage_extent_buffer(struct page *page)
> +{
> +	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
> +	u64 page_start = page_offset(page);
> +	int bitmap_size = BTRFS_SUBPAGE_BITMAP_SIZE;
> +	int bit_start = 0;
> +	int ret;
> +
> +	while (bit_start < bitmap_size) {
> +		struct btrfs_subpage *subpage;
> +		struct extent_buffer *eb;
> +		unsigned long flags;
> +		u16 tmp = 1 << bit_start;
> +		u64 start;
> +
> +		/*
> +		 * Make sure the page still has private, as previous iteration
> +		 * can detach page private.
> +		 */
> +		spin_lock(&page->mapping->private_lock);
> +		if (!PagePrivate(page)) {
> +			spin_unlock(&page->mapping->private_lock);
> +			break;
> +		}
> +
> +		subpage = (struct btrfs_subpage *)page->private;
> +
> +		spin_lock_irqsave(&subpage->lock, flags);
> +		spin_unlock(&page->mapping->private_lock);
> +
> +		if (!(tmp & subpage->tree_block_bitmap))  {
> +			spin_unlock_irqrestore(&subpage->lock, flags);
> +			bit_start++;
> +			continue;
> +		}
> +
> +		start = bit_start * fs_info->sectorsize + page_start;
> +		bit_start += fs_info->nodesize >> fs_info->sectorsize_bits;
> +		/*
> +		 * Here we can't call find_extent_buffer() which will increase
> +		 * eb->refs.
> +		 */
> +		rcu_read_lock();
> +		eb = radix_tree_lookup(&fs_info->buffer_radix,
> +				start >> fs_info->sectorsize_bits);
> +		ASSERT(eb);

Another ASSERT() hit here. Surprised that I have never hit such case before.

Since in releasse_extent_buffer(), radix tree is removed first, then 
subpage tree_block_bitmap update, we could have cases where subpage 
tree_block_bitmap is set but no eb in radix tree.

In that case, we can safely go next bit and re-check.

The function return value is only bounded to if the page has private bit 
or not, so here we can safely continue.

Nik is right on this, we need better eb refs handling refactor, I'll 
investigate more time to make the eb refs handling better.

Thanks,
Qu
> +		spin_lock(&eb->refs_lock);
> +		if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb) ||
> +		    !test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
> +			spin_unlock(&eb->refs_lock);
> +			rcu_read_unlock();
> +			continue;
> +		}
> +		rcu_read_unlock();
> +		spin_unlock_irqrestore(&subpage->lock, flags);
> +		/*
> +		 * Here we don't care the return value, we will always check
> +		 * the page private at the end.
> +		 * And release_extent_buffer() will release the refs_lock.
> +		 */
> +		release_extent_buffer(eb);
> +	}
> +	/* Finally to check if we have cleared page private */
> +	spin_lock(&page->mapping->private_lock);
> +	if (!PagePrivate(page))
> +		ret = 1;
> +	else
> +		ret = 0;
> +	spin_unlock(&page->mapping->private_lock);
> +	return ret;
> +
> +}
> +
>   int try_release_extent_buffer(struct page *page)
>   {
>   	struct extent_buffer *eb;
>   
> +	if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
> +		return try_release_subpage_extent_buffer(page);
> +
>   	/*
>   	 * We need to make sure nobody is attaching this page to an eb right
>   	 * now.
>
Qu Wenruo Jan. 6, 2021, 8:43 a.m. UTC | #2
On 2021/1/6 下午4:24, Qu Wenruo wrote:
> 
> 
> On 2021/1/6 上午9:01, Qu Wenruo wrote:
>> Unlike the original try_release_extent_buffer,
>> try_release_subpage_extent_buffer() will iterate through
>> btrfs_subpage::tree_block_bitmap, and try to release each extent buffer.
>>
>> Signed-off-by: Qu Wenruo <wqu@suse.com>
>> ---
>>   fs/btrfs/extent_io.c | 76 ++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 76 insertions(+)
>>
>> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
>> index 194cb8b63216..792264f5c3c2 100644
>> --- a/fs/btrfs/extent_io.c
>> +++ b/fs/btrfs/extent_io.c
>> @@ -6258,10 +6258,86 @@ void memmove_extent_buffer(const struct 
>> extent_buffer *dst,
>>       }
>>   }
>> +static int try_release_subpage_extent_buffer(struct page *page)
>> +{
>> +    struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
>> +    u64 page_start = page_offset(page);
>> +    int bitmap_size = BTRFS_SUBPAGE_BITMAP_SIZE;
>> +    int bit_start = 0;
>> +    int ret;
>> +
>> +    while (bit_start < bitmap_size) {
>> +        struct btrfs_subpage *subpage;
>> +        struct extent_buffer *eb;
>> +        unsigned long flags;
>> +        u16 tmp = 1 << bit_start;
>> +        u64 start;
>> +
>> +        /*
>> +         * Make sure the page still has private, as previous iteration
>> +         * can detach page private.
>> +         */
>> +        spin_lock(&page->mapping->private_lock);
>> +        if (!PagePrivate(page)) {
>> +            spin_unlock(&page->mapping->private_lock);
>> +            break;
>> +        }
>> +
>> +        subpage = (struct btrfs_subpage *)page->private;
>> +
>> +        spin_lock_irqsave(&subpage->lock, flags);
>> +        spin_unlock(&page->mapping->private_lock);
>> +
>> +        if (!(tmp & subpage->tree_block_bitmap))  {
>> +            spin_unlock_irqrestore(&subpage->lock, flags);
>> +            bit_start++;
>> +            continue;
>> +        }
>> +
>> +        start = bit_start * fs_info->sectorsize + page_start;
>> +        bit_start += fs_info->nodesize >> fs_info->sectorsize_bits;
>> +        /*
>> +         * Here we can't call find_extent_buffer() which will increase
>> +         * eb->refs.
>> +         */
>> +        rcu_read_lock();
>> +        eb = radix_tree_lookup(&fs_info->buffer_radix,
>> +                start >> fs_info->sectorsize_bits);
>> +        ASSERT(eb);
> 
> Another ASSERT() hit here. Surprised that I have never hit such case 
> before.
> 
> Since in releasse_extent_buffer(), radix tree is removed first, then 
> subpage tree_block_bitmap update, we could have cases where subpage 
> tree_block_bitmap is set but no eb in radix tree.
> 
> In that case, we can safely go next bit and re-check.
> 
> The function return value is only bounded to if the page has private bit 
> or not, so here we can safely continue.
> 
> Nik is right on this, we need better eb refs handling refactor, I'll 
> investigate more time to make the eb refs handling better.

The root problem here is, we have too many things to be synchronized for 
extent buffer.

We have eb::refs, eb::bflags (IN_TREE), buffer_radix, and the new 
subpage::tree_block_bitmap, they all need to be in sync with each other.

I'm wondering if we could find a good and clear enough way to handle 
extent buffers.

IMHO, we need to sacrifice some metadata performance (which is already 
poor enough), or there is really no better way to solve the mess...

Thanks,
Qu

> 
> Thanks,
> Qu
>> +        spin_lock(&eb->refs_lock);
>> +        if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb) ||
>> +            !test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
>> +            spin_unlock(&eb->refs_lock);
>> +            rcu_read_unlock();
>> +            continue;
>> +        }
>> +        rcu_read_unlock();
>> +        spin_unlock_irqrestore(&subpage->lock, flags);
>> +        /*
>> +         * Here we don't care the return value, we will always check
>> +         * the page private at the end.
>> +         * And release_extent_buffer() will release the refs_lock.
>> +         */
>> +        release_extent_buffer(eb);
>> +    }
>> +    /* Finally to check if we have cleared page private */
>> +    spin_lock(&page->mapping->private_lock);
>> +    if (!PagePrivate(page))
>> +        ret = 1;
>> +    else
>> +        ret = 0;
>> +    spin_unlock(&page->mapping->private_lock);
>> +    return ret;
>> +
>> +}
>> +
>>   int try_release_extent_buffer(struct page *page)
>>   {
>>       struct extent_buffer *eb;
>> +    if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
>> +        return try_release_subpage_extent_buffer(page);
>> +
>>       /*
>>        * We need to make sure nobody is attaching this page to an eb 
>> right
>>        * now.
>>
>
diff mbox series

Patch

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 194cb8b63216..792264f5c3c2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -6258,10 +6258,86 @@  void memmove_extent_buffer(const struct extent_buffer *dst,
 	}
 }
 
+static int try_release_subpage_extent_buffer(struct page *page)
+{
+	struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
+	u64 page_start = page_offset(page);
+	int bitmap_size = BTRFS_SUBPAGE_BITMAP_SIZE;
+	int bit_start = 0;
+	int ret;
+
+	while (bit_start < bitmap_size) {
+		struct btrfs_subpage *subpage;
+		struct extent_buffer *eb;
+		unsigned long flags;
+		u16 tmp = 1 << bit_start;
+		u64 start;
+
+		/*
+		 * Make sure the page still has private, as previous iteration
+		 * can detach page private.
+		 */
+		spin_lock(&page->mapping->private_lock);
+		if (!PagePrivate(page)) {
+			spin_unlock(&page->mapping->private_lock);
+			break;
+		}
+
+		subpage = (struct btrfs_subpage *)page->private;
+
+		spin_lock_irqsave(&subpage->lock, flags);
+		spin_unlock(&page->mapping->private_lock);
+
+		if (!(tmp & subpage->tree_block_bitmap))  {
+			spin_unlock_irqrestore(&subpage->lock, flags);
+			bit_start++;
+			continue;
+		}
+
+		start = bit_start * fs_info->sectorsize + page_start;
+		bit_start += fs_info->nodesize >> fs_info->sectorsize_bits;
+		/*
+		 * Here we can't call find_extent_buffer() which will increase
+		 * eb->refs.
+		 */
+		rcu_read_lock();
+		eb = radix_tree_lookup(&fs_info->buffer_radix,
+				start >> fs_info->sectorsize_bits);
+		ASSERT(eb);
+		spin_lock(&eb->refs_lock);
+		if (atomic_read(&eb->refs) != 1 || extent_buffer_under_io(eb) ||
+		    !test_and_clear_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
+			spin_unlock(&eb->refs_lock);
+			rcu_read_unlock();
+			continue;
+		}
+		rcu_read_unlock();
+		spin_unlock_irqrestore(&subpage->lock, flags);
+		/*
+		 * Here we don't care the return value, we will always check
+		 * the page private at the end.
+		 * And release_extent_buffer() will release the refs_lock.
+		 */
+		release_extent_buffer(eb);
+	}
+	/* Finally to check if we have cleared page private */
+	spin_lock(&page->mapping->private_lock);
+	if (!PagePrivate(page))
+		ret = 1;
+	else
+		ret = 0;
+	spin_unlock(&page->mapping->private_lock);
+	return ret;
+
+}
+
 int try_release_extent_buffer(struct page *page)
 {
 	struct extent_buffer *eb;
 
+	if (btrfs_sb(page->mapping->host->i_sb)->sectorsize < PAGE_SIZE)
+		return try_release_subpage_extent_buffer(page);
+
 	/*
 	 * We need to make sure nobody is attaching this page to an eb right
 	 * now.