diff mbox series

NFS: Fix O_DIRECT read problem when another write is going on

Message ID 1569834678-16117-1-git-send-email-suyj.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show
Series NFS: Fix O_DIRECT read problem when another write is going on | expand

Commit Message

Su Yanjun Sept. 30, 2019, 9:11 a.m. UTC
In xfstests generic/465 tests failed. Because O_DIRECT r/w use
async rpc calls, when r/w rpc calls are running concurrently we
may read partial data which is wrong.

For example as follows.
 user buffer
/--------\
|    |XXXX|
 rpc0 rpc1

When rpc0 runs it encounters eof so return 0, then another writes
something. When rpc1 runs it returns some data. The total data
buffer contains wrong data.

In this patch we check eof mark for each direct request. If encounters
eof then set eof mark in the request, when we meet it again report
-EAGAIN error. In nfs_direct_complete we convert -EAGAIN as if read
nothing. When the reader issue another read it will read ok.

Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
---
 fs/nfs/direct.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

Comments

Trond Myklebust Sept. 30, 2019, 6:06 p.m. UTC | #1
Hi Su,

On Mon, 2019-09-30 at 17:11 +0800, Su Yanjun wrote:
> In xfstests generic/465 tests failed. Because O_DIRECT r/w use
> async rpc calls, when r/w rpc calls are running concurrently we
> may read partial data which is wrong.
> 
> For example as follows.
>  user buffer
> /--------\
> >    |XXXX|
>  rpc0 rpc1
> 
> When rpc0 runs it encounters eof so return 0, then another writes
> something. When rpc1 runs it returns some data. The total data
> buffer contains wrong data.
> 
> In this patch we check eof mark for each direct request. If
> encounters
> eof then set eof mark in the request, when we meet it again report
> -EAGAIN error. In nfs_direct_complete we convert -EAGAIN as if read
> nothing. When the reader issue another read it will read ok.
> 
> Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
> ---
>  fs/nfs/direct.c | 14 +++++++++++++-
>  1 file changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
> index 222d711..7f737a3 100644
> --- a/fs/nfs/direct.c
> +++ b/fs/nfs/direct.c
> @@ -93,6 +93,7 @@ struct nfs_direct_req {
>  				bytes_left,	/* bytes left to be
> sent */
>  				error;		/* any reported error
> */
>  	struct completion	completion;	/* wait for i/o completion */
> +	int			eof;		/* eof mark in the
> req */
>  
>  	/* commit state */
>  	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo
> */
> @@ -380,6 +381,12 @@ static void nfs_direct_complete(struct
> nfs_direct_req *dreq)
>  {
>  	struct inode *inode = dreq->inode;
>  
> +	/* read partial data just as read nothing */
> +	if (dreq->error == -EAGAIN) {
> +		dreq->count = 0;
> +		dreq->error = 0;
> +	}
> +
>  	inode_dio_end(inode);
>  
>  	if (dreq->iocb) {
> @@ -413,8 +420,13 @@ static void nfs_direct_read_completion(struct
> nfs_pgio_header *hdr)
>  	if (hdr->good_bytes != 0)
>  		nfs_direct_good_bytes(dreq, hdr);
>  
> -	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
> +	if (dreq->eof)
> +		dreq->error = -EAGAIN;
> +
> +	if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
>  		dreq->error = 0;
> +		dreq->eof = 1;
> +	}
>  
>  	spin_unlock(&dreq->lock);
>  

Thanks for looking into this issue. I agree with your analysis of what
is going wrong in generic/465.

However, I think the problem is greater than just EOF. I think we also
need to look at the generic error handling, and ensure that it handles
a truncated RPC call in the middle of a series of calls correctly.

Please see the two patches I sent you just now and check if they fix
the problem for you.
Su Yanjun Oct. 7, 2019, 2:17 a.m. UTC | #2
在 2019/10/1 2:06, Trond Myklebust 写道:
> Hi Su,
>
> On Mon, 2019-09-30 at 17:11 +0800, Su Yanjun wrote:
>> In xfstests generic/465 tests failed. Because O_DIRECT r/w use
>> async rpc calls, when r/w rpc calls are running concurrently we
>> may read partial data which is wrong.
>>
>> For example as follows.
>>   user buffer
>> /--------\
>>>     |XXXX|
>>   rpc0 rpc1
>>
>> When rpc0 runs it encounters eof so return 0, then another writes
>> something. When rpc1 runs it returns some data. The total data
>> buffer contains wrong data.
>>
>> In this patch we check eof mark for each direct request. If
>> encounters
>> eof then set eof mark in the request, when we meet it again report
>> -EAGAIN error. In nfs_direct_complete we convert -EAGAIN as if read
>> nothing. When the reader issue another read it will read ok.
>>
>> Signed-off-by: Su Yanjun <suyj.fnst@cn.fujitsu.com>
>> ---
>>   fs/nfs/direct.c | 14 +++++++++++++-
>>   1 file changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
>> index 222d711..7f737a3 100644
>> --- a/fs/nfs/direct.c
>> +++ b/fs/nfs/direct.c
>> @@ -93,6 +93,7 @@ struct nfs_direct_req {
>>   				bytes_left,	/* bytes left to be
>> sent */
>>   				error;		/* any reported error
>> */
>>   	struct completion	completion;	/* wait for i/o completion */
>> +	int			eof;		/* eof mark in the
>> req */
>>   
>>   	/* commit state */
>>   	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo
>> */
>> @@ -380,6 +381,12 @@ static void nfs_direct_complete(struct
>> nfs_direct_req *dreq)
>>   {
>>   	struct inode *inode = dreq->inode;
>>   
>> +	/* read partial data just as read nothing */
>> +	if (dreq->error == -EAGAIN) {
>> +		dreq->count = 0;
>> +		dreq->error = 0;
>> +	}
>> +
>>   	inode_dio_end(inode);
>>   
>>   	if (dreq->iocb) {
>> @@ -413,8 +420,13 @@ static void nfs_direct_read_completion(struct
>> nfs_pgio_header *hdr)
>>   	if (hdr->good_bytes != 0)
>>   		nfs_direct_good_bytes(dreq, hdr);
>>   
>> -	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
>> +	if (dreq->eof)
>> +		dreq->error = -EAGAIN;
>> +
>> +	if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
>>   		dreq->error = 0;
>> +		dreq->eof = 1;
>> +	}
>>   
>>   	spin_unlock(&dreq->lock);
>>   
> Thanks for looking into this issue. I agree with your analysis of what
> is going wrong in generic/465.
>
> However, I think the problem is greater than just EOF. I think we also
> need to look at the generic error handling, and ensure that it handles
> a truncated RPC call in the middle of a series of calls correctly.
>
> Please see the two patches I sent you just now and check if they fix
> the problem for you.

The patchset you sent works for generic/465.

Thanks a lot
diff mbox series

Patch

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 222d711..7f737a3 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -93,6 +93,7 @@  struct nfs_direct_req {
 				bytes_left,	/* bytes left to be sent */
 				error;		/* any reported error */
 	struct completion	completion;	/* wait for i/o completion */
+	int			eof;		/* eof mark in the req */
 
 	/* commit state */
 	struct nfs_mds_commit_info mds_cinfo;	/* Storage for cinfo */
@@ -380,6 +381,12 @@  static void nfs_direct_complete(struct nfs_direct_req *dreq)
 {
 	struct inode *inode = dreq->inode;
 
+	/* read partial data just as read nothing */
+	if (dreq->error == -EAGAIN) {
+		dreq->count = 0;
+		dreq->error = 0;
+	}
+
 	inode_dio_end(inode);
 
 	if (dreq->iocb) {
@@ -413,8 +420,13 @@  static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
 	if (hdr->good_bytes != 0)
 		nfs_direct_good_bytes(dreq, hdr);
 
-	if (test_bit(NFS_IOHDR_EOF, &hdr->flags))
+	if (dreq->eof)
+		dreq->error = -EAGAIN;
+
+	if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
 		dreq->error = 0;
+		dreq->eof = 1;
+	}
 
 	spin_unlock(&dreq->lock);