diff mbox series

[v2] ceph: fix blindly expanding the readahead windows

Message ID 20230504052306.405208-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] ceph: fix blindly expanding the readahead windows | expand

Commit Message

Xiubo Li May 4, 2023, 5:23 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Blindly expanding the readahead windows will cause unneccessary
pagecache thrashing and also will introdue the network workload.
We should disable expanding the windows if the readahead is disabled
and also shouldn't expand the windows too much.

Expanding forward firstly instead of expanding backward for possible
sequential reads.

URL: https://www.spinics.net/lists/ceph-users/msg76183.html
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V2:
- fix possible cross-block issue pointed out by Ilya.


 fs/ceph/addr.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

Comments

胡玮文 May 4, 2023, 10:12 a.m. UTC | #1
Hi Xiubo,

I just send a patch to solve the same issue[1].  My patch bound the
request by i_size, which is orthogonal to your changes. To incorporate
both, I propose the following patch.

Also, since this is a performance regression, I think we should backport
it to stable versions?

Another comment inline.

Hu Weiwen

[1]: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn/

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 6bb251a4d613..d1d8e2562182 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -187,16 +187,30 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
 	struct inode *inode = rreq->inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_layout *lo = &ci->i_layout;
+	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+	unsigned long max_len = max_pages << PAGE_SHIFT;
+	loff_t end = rreq->start + rreq->len, new_end;
 	u32 blockoff;
 	u64 blockno;

-	/* Expand the start downward */
-	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
-	rreq->start = blockno * lo->stripe_unit;
-	rreq->len += blockoff;
+	/* Readahead is disabled */
+	if (!max_pages)
+		return;
+
+	/* Try to expand the length forward by rounding up it to the next block */
+	new_end = round_up(end, lo->stripe_unit);
+	/* But do not exceed the file size,
+	 * unless the original request already exceeds it. */
+	new_end = min(new_end, rreq->i_size);
+	if (new_end > end && new_end <= rreq->start + max_len)
+		rreq->len = new_end - rreq->start;

-	/* Now, round up the length to the next block */
-	rreq->len = roundup(rreq->len, lo->stripe_unit);
+	/* Try to expand the start downward */
+	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+	if (rreq->len + blockoff <= max_len) {
+		rreq->start = blockno * lo->stripe_unit;
+		rreq->len += blockoff;
+	}
 }

 static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)


On Thu, May 04, 2023 at 01:23:06PM +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Blindly expanding the readahead windows will cause unneccessary
> pagecache thrashing and also will introdue the network workload.
> We should disable expanding the windows if the readahead is disabled
> and also shouldn't expand the windows too much.
> 
> Expanding forward firstly instead of expanding backward for possible
> sequential reads.
> 
> URL: https://www.spinics.net/lists/ceph-users/msg76183.html
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> V2:
> - fix possible cross-block issue pointed out by Ilya.
> 
> 
>  fs/ceph/addr.c | 24 ++++++++++++++++++------
>  1 file changed, 18 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index ca4dc6450887..03a326da8fd8 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -188,16 +188,28 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
>  	struct inode *inode = rreq->inode;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_file_layout *lo = &ci->i_layout;
> +	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
> +	unsigned long max_len = max_pages << PAGE_SHIFT;
> +	unsigned long len;
>  	u32 blockoff;
>  	u64 blockno;
>  
> -	/* Expand the start downward */
> -	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> -	rreq->start = blockno * lo->stripe_unit;
> -	rreq->len += blockoff;
> +	/* Readahead is disabled */
> +	if (!max_pages)
> +		return;
> +
> +	/* Try to expand the length forward by rounding up it to the next block */
> +	div_u64_rem(rreq->start + rreq->len, lo->stripe_unit, &blockoff);
> +	len = lo->stripe_unit - blockoff;
This would expand the request by a whole block, if the original request
is already aligned (blockoff == 0).
> +	if (rreq->len + len <= max_len)
> +		rreq->len += len;
>  
> -	/* Now, round up the length to the next block */
> -	rreq->len = roundup(rreq->len, lo->stripe_unit);
> +	/* Try to expand the start downward */
> +	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> +	if (rreq->len + blockoff <= max_len) {
> +		rreq->start = blockno * lo->stripe_unit;
> +		rreq->len += blockoff;
> +	}
>  }
>  
>  static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
> -- 
> 2.40.0
>
Xiubo Li May 4, 2023, 10:58 a.m. UTC | #2
On 5/4/23 18:12, 胡玮文 wrote:
> Hi Xiubo,
>
> I just send a patch to solve the same issue[1].  My patch bound the
> request by i_size, which is orthogonal to your changes. To incorporate
> both, I propose the following patch.
>
> Also, since this is a performance regression, I think we should backport
> it to stable versions?
>
> Another comment inline.
>
> Hu Weiwen
>
> [1]: https://lore.kernel.org/ceph-devel/20230504082510.247-1-sehuww@mail.scut.edu.cn/
>
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 6bb251a4d613..d1d8e2562182 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -187,16 +187,30 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
>   	struct inode *inode = rreq->inode;
>   	struct ceph_inode_info *ci = ceph_inode(inode);
>   	struct ceph_file_layout *lo = &ci->i_layout;
> +	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
> +	unsigned long max_len = max_pages << PAGE_SHIFT;
> +	loff_t end = rreq->start + rreq->len, new_end;
>   	u32 blockoff;
>   	u64 blockno;
>
> -	/* Expand the start downward */
> -	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> -	rreq->start = blockno * lo->stripe_unit;
> -	rreq->len += blockoff;
> +	/* Readahead is disabled */
> +	if (!max_pages)
> +		return;
> +
> +	/* Try to expand the length forward by rounding up it to the next block */
> +	new_end = round_up(end, lo->stripe_unit);
> +	/* But do not exceed the file size,
> +	 * unless the original request already exceeds it. */
> +	new_end = min(new_end, rreq->i_size);
> +	if (new_end > end && new_end <= rreq->start + max_len)
> +		rreq->len = new_end - rreq->start;
>
> -	/* Now, round up the length to the next block */
> -	rreq->len = roundup(rreq->len, lo->stripe_unit);
> +	/* Try to expand the start downward */
> +	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
> +	if (rreq->len + blockoff <= max_len) {
> +		rreq->start = blockno * lo->stripe_unit;
> +		rreq->len += blockoff;
> +	}
>   }
>
>   static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
>
>
> On Thu, May 04, 2023 at 01:23:06PM +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Blindly expanding the readahead windows will cause unneccessary
>> pagecache thrashing and also will introdue the network workload.
>> We should disable expanding the windows if the readahead is disabled
>> and also shouldn't expand the windows too much.
>>
>> Expanding forward firstly instead of expanding backward for possible
>> sequential reads.
>>
>> URL: https://www.spinics.net/lists/ceph-users/msg76183.html
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V2:
>> - fix possible cross-block issue pointed out by Ilya.
>>
>>
>>   fs/ceph/addr.c | 24 ++++++++++++++++++------
>>   1 file changed, 18 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
>> index ca4dc6450887..03a326da8fd8 100644
>> --- a/fs/ceph/addr.c
>> +++ b/fs/ceph/addr.c
>> @@ -188,16 +188,28 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
>>   	struct inode *inode = rreq->inode;
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>   	struct ceph_file_layout *lo = &ci->i_layout;
>> +	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
>> +	unsigned long max_len = max_pages << PAGE_SHIFT;
>> +	unsigned long len;
>>   	u32 blockoff;
>>   	u64 blockno;
>>   
>> -	/* Expand the start downward */
>> -	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
>> -	rreq->start = blockno * lo->stripe_unit;
>> -	rreq->len += blockoff;
>> +	/* Readahead is disabled */
>> +	if (!max_pages)
>> +		return;
>> +
>> +	/* Try to expand the length forward by rounding up it to the next block */
>> +	div_u64_rem(rreq->start + rreq->len, lo->stripe_unit, &blockoff);
>> +	len = lo->stripe_unit - blockoff;
> This would expand the request by a whole block, if the original request
> is already aligned (blockoff == 0).

Hi 胡玮文

Good catch.

The above patch looks better and I will revise it.

Thanks

- Xiubo

>> +	if (rreq->len + len <= max_len)
>> +		rreq->len += len;
>>   
>> -	/* Now, round up the length to the next block */
>> -	rreq->len = roundup(rreq->len, lo->stripe_unit);
>> +	/* Try to expand the start downward */
>> +	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
>> +	if (rreq->len + blockoff <= max_len) {
>> +		rreq->start = blockno * lo->stripe_unit;
>> +		rreq->len += blockoff;
>> +	}
>>   }
>>   
>>   static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
>> -- 
>> 2.40.0
>>
diff mbox series

Patch

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ca4dc6450887..03a326da8fd8 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -188,16 +188,28 @@  static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
 	struct inode *inode = rreq->inode;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_file_layout *lo = &ci->i_layout;
+	unsigned long max_pages = inode->i_sb->s_bdi->ra_pages;
+	unsigned long max_len = max_pages << PAGE_SHIFT;
+	unsigned long len;
 	u32 blockoff;
 	u64 blockno;
 
-	/* Expand the start downward */
-	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
-	rreq->start = blockno * lo->stripe_unit;
-	rreq->len += blockoff;
+	/* Readahead is disabled */
+	if (!max_pages)
+		return;
+
+	/* Try to expand the length forward by rounding up it to the next block */
+	div_u64_rem(rreq->start + rreq->len, lo->stripe_unit, &blockoff);
+	len = lo->stripe_unit - blockoff;
+	if (rreq->len + len <= max_len)
+		rreq->len += len;
 
-	/* Now, round up the length to the next block */
-	rreq->len = roundup(rreq->len, lo->stripe_unit);
+	/* Try to expand the start downward */
+	blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+	if (rreq->len + blockoff <= max_len) {
+		rreq->start = blockno * lo->stripe_unit;
+		rreq->len += blockoff;
+	}
 }
 
 static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)