diff mbox series

[v3,3/4] ceph: return the real size readed when hit EOF

Message ID 20211028091438.21402-4-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: size handling for the fscrypt | expand

Commit Message

Xiubo Li Oct. 28, 2021, 9:14 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/file.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

Comments

Jeff Layton Oct. 28, 2021, 6:11 p.m. UTC | #1
On Thu, 2021-10-28 at 17:14 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/file.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 74db403a4c35..1988e75ad4a2 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -910,6 +910,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>  	ssize_t ret;
>  	u64 off = *ki_pos;
>  	u64 len = iov_iter_count(to);
> +	u64 i_size = i_size_read(inode);
>  
>  	dout("sync_read on inode %p %llu~%u\n", inode, *ki_pos, (unsigned)len);
>  
> @@ -933,7 +934,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>  		struct page **pages;
>  		int num_pages;
>  		size_t page_off;
> -		u64 i_size;
>  		bool more;
>  		int idx;
>  		size_t left;
> @@ -980,7 +980,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>  
>  		ceph_osdc_put_request(req);
>  
> -		i_size = i_size_read(inode);

I wonder a little about removing this i_size fetch. Then again, the
i_size could change any time after we fetch it so it doesn't seem
worthwhile to do so.

>  		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>  		     off, len, ret, i_size, (more ? " MORE" : ""));
>  
> @@ -1056,11 +1055,14 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>  	}
>  
>  	if (off > *ki_pos) {
> -		if (ret >= 0 &&
> -		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
> +		if (off >= i_size) {
>  			*retry_op = CHECK_EOF;
> -		ret = off - *ki_pos;
> -		*ki_pos = off;
> +			ret = i_size - *ki_pos;
> +			*ki_pos = i_size;
> +		} else {
> +			ret = off - *ki_pos;
> +			*ki_pos = off;
> +		}
>  	}
>  out:
>  	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);

I think I'll go ahead and pull this patch into the testing branch, since
it seems to be correct.

Thanks!
Xiubo Li Oct. 30, 2021, 4:35 a.m. UTC | #2
On 10/29/21 2:11 AM, Jeff Layton wrote:
> On Thu, 2021-10-28 at 17:14 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/file.c | 14 ++++++++------
>>   1 file changed, 8 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index 74db403a4c35..1988e75ad4a2 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -910,6 +910,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>   	ssize_t ret;
>>   	u64 off = *ki_pos;
>>   	u64 len = iov_iter_count(to);
>> +	u64 i_size = i_size_read(inode);
>>   
>>   	dout("sync_read on inode %p %llu~%u\n", inode, *ki_pos, (unsigned)len);
>>   
>> @@ -933,7 +934,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>   		struct page **pages;
>>   		int num_pages;
>>   		size_t page_off;
>> -		u64 i_size;
>>   		bool more;
>>   		int idx;
>>   		size_t left;
>> @@ -980,7 +980,6 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>   
>>   		ceph_osdc_put_request(req);
>>   
>> -		i_size = i_size_read(inode);
> I wonder a little about removing this i_size fetch. Then again, the
> i_size could change any time after we fetch it so it doesn't seem
> worthwhile to do so.

I checked the code again. There has 2 ways to change the inode size, 
which are writing a file and truncating the size. Both this are safe by 
protecting by 'inode->i_rwsem' and FILE caps.

If there has no any other will do this in MDS side, such as when doing a 
file recovery, which I don't think it will.

I will add it back for now.

>
>>   		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
>>   		     off, len, ret, i_size, (more ? " MORE" : ""));
>>   
>> @@ -1056,11 +1055,14 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>   	}
>>   
>>   	if (off > *ki_pos) {
>> -		if (ret >= 0 &&
>> -		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
>> +		if (off >= i_size) {
>>   			*retry_op = CHECK_EOF;
>> -		ret = off - *ki_pos;
>> -		*ki_pos = off;
>> +			ret = i_size - *ki_pos;
>> +			*ki_pos = i_size;
>> +		} else {
>> +			ret = off - *ki_pos;
>> +			*ki_pos = off;
>> +		}
>>   	}
>>   out:
>>   	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
> I think I'll go ahead and pull this patch into the testing branch, since
> it seems to be correct.
>
> Thanks!
diff mbox series

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 74db403a4c35..1988e75ad4a2 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -910,6 +910,7 @@  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 	ssize_t ret;
 	u64 off = *ki_pos;
 	u64 len = iov_iter_count(to);
+	u64 i_size = i_size_read(inode);
 
 	dout("sync_read on inode %p %llu~%u\n", inode, *ki_pos, (unsigned)len);
 
@@ -933,7 +934,6 @@  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 		struct page **pages;
 		int num_pages;
 		size_t page_off;
-		u64 i_size;
 		bool more;
 		int idx;
 		size_t left;
@@ -980,7 +980,6 @@  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 
 		ceph_osdc_put_request(req);
 
-		i_size = i_size_read(inode);
 		dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
 		     off, len, ret, i_size, (more ? " MORE" : ""));
 
@@ -1056,11 +1055,14 @@  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 	}
 
 	if (off > *ki_pos) {
-		if (ret >= 0 &&
-		    iov_iter_count(to) > 0 && off >= i_size_read(inode))
+		if (off >= i_size) {
 			*retry_op = CHECK_EOF;
-		ret = off - *ki_pos;
-		*ki_pos = off;
+			ret = i_size - *ki_pos;
+			*ki_pos = i_size;
+		} else {
+			ret = off - *ki_pos;
+			*ki_pos = off;
+		}
 	}
 out:
 	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);