[RFC,v2] ceph: do not execute direct write in parallel if O_APPEND is specified
diff mbox series

Message ID 20200204022825.26538-1-xiubli@redhat.com
State New
Headers show
Series
  • [RFC,v2] ceph: do not execute direct write in parallel if O_APPEND is specified
Related show

Commit Message

Xiubo Li Feb. 4, 2020, 2:28 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

In O_APPEND & O_DIRECT mode, the data from different writers will
be possiblly overlapping each other with shared lock.

For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
mode:

          Writer1                         Writer2

     shared_lock()                   shared_lock()
     getattr(CAP_SIZE)               getattr(CAP_SIZE)
     iocb->ki_pos = EOF              iocb->ki_pos = EOF
     write(data1)
                                     write(data2)
     shared_unlock()                 shared_unlock()

The data2 will overlap the data1 from the same file offset, the
old EOF.

Switch to exclusive lock instead when O_APPEND is specified.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

Changed in V2:
- fix the commit comment
- add more detail in the commit comment
- s/direct_lock/shared_lock/g

 fs/ceph/file.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

Comments

Jeff Layton Feb. 4, 2020, 2:35 p.m. UTC | #1
On Mon, 2020-02-03 at 21:28 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> In O_APPEND & O_DIRECT mode, the data from different writers will
> be possiblly overlapping each other with shared lock.
> 
> For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
> mode:
> 
>           Writer1                         Writer2
> 
>      shared_lock()                   shared_lock()
>      getattr(CAP_SIZE)               getattr(CAP_SIZE)
>      iocb->ki_pos = EOF              iocb->ki_pos = EOF
>      write(data1)
>                                      write(data2)
>      shared_unlock()                 shared_unlock()
> 
> The data2 will overlap the data1 from the same file offset, the
> old EOF.
> 
> Switch to exclusive lock instead when O_APPEND is specified.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> Changed in V2:
> - fix the commit comment
> - add more detail in the commit comment
> - s/direct_lock/shared_lock/g
> 
>  fs/ceph/file.c | 17 +++++++++++------
>  1 file changed, 11 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index ac7fe8b8081c..e3e67ef215dd 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -1475,6 +1475,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	struct ceph_cap_flush *prealloc_cf;
>  	ssize_t count, written = 0;
>  	int err, want, got;
> +	bool shared_lock = false;
>  	loff_t pos;
>  	loff_t limit = max(i_size_read(inode), fsc->max_file_size);
>  
> @@ -1485,8 +1486,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  	if (!prealloc_cf)
>  		return -ENOMEM;
>  
> +	if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
> +		shared_lock = true;
> +
>  retry_snap:
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (shared_lock)
>  		ceph_start_io_direct(inode);
>  	else
>  		ceph_start_io_write(inode);
> @@ -1576,14 +1580,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  
>  		/* we might need to revert back to that point */
>  		data = *from;
> -		if (iocb->ki_flags & IOCB_DIRECT) {
> +		if (iocb->ki_flags & IOCB_DIRECT)
>  			written = ceph_direct_read_write(iocb, &data, snapc,
>  							 &prealloc_cf);
> -			ceph_end_io_direct(inode);
> -		} else {
> +		else
>  			written = ceph_sync_write(iocb, &data, pos, snapc);
> +		if (shared_lock)
> +			ceph_end_io_direct(inode);
> +		else
>  			ceph_end_io_write(inode);
> -		}
>  		if (written > 0)
>  			iov_iter_advance(from, written);
>  		ceph_put_snap_context(snapc);
> @@ -1634,7 +1639,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>  
>  	goto out_unlocked;
>  out:
> -	if (iocb->ki_flags & IOCB_DIRECT)
> +	if (shared_lock)
>  		ceph_end_io_direct(inode);
>  	else
>  		ceph_end_io_write(inode);

Ok, I think this looks reasonable, but I actually preferred the
"direct_lock" name you had before. I'm going to do some testing today
and will probably merge this (with s/shared_lock/direct_lock/) if it
tests out ok.
Xiubo Li Feb. 4, 2020, 2:44 p.m. UTC | #2
On 2020/2/4 22:35, Jeff Layton wrote:
> On Mon, 2020-02-03 at 21:28 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In O_APPEND & O_DIRECT mode, the data from different writers will
>> be possiblly overlapping each other with shared lock.
>>
>> For example, both Writer1 and Writer2 are in O_APPEND and O_DIRECT
>> mode:
>>
>>            Writer1                         Writer2
>>
>>       shared_lock()                   shared_lock()
>>       getattr(CAP_SIZE)               getattr(CAP_SIZE)
>>       iocb->ki_pos = EOF              iocb->ki_pos = EOF
>>       write(data1)
>>                                       write(data2)
>>       shared_unlock()                 shared_unlock()
>>
>> The data2 will overlap the data1 from the same file offset, the
>> old EOF.
>>
>> Switch to exclusive lock instead when O_APPEND is specified.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> Changed in V2:
>> - fix the commit comment
>> - add more detail in the commit comment
>> - s/direct_lock/shared_lock/g
>>
>>   fs/ceph/file.c | 17 +++++++++++------
>>   1 file changed, 11 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>> index ac7fe8b8081c..e3e67ef215dd 100644
>> --- a/fs/ceph/file.c
>> +++ b/fs/ceph/file.c
>> @@ -1475,6 +1475,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>   	struct ceph_cap_flush *prealloc_cf;
>>   	ssize_t count, written = 0;
>>   	int err, want, got;
>> +	bool shared_lock = false;
>>   	loff_t pos;
>>   	loff_t limit = max(i_size_read(inode), fsc->max_file_size);
>>   
>> @@ -1485,8 +1486,11 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>   	if (!prealloc_cf)
>>   		return -ENOMEM;
>>   
>> +	if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
>> +		shared_lock = true;
>> +
>>   retry_snap:
>> -	if (iocb->ki_flags & IOCB_DIRECT)
>> +	if (shared_lock)
>>   		ceph_start_io_direct(inode);
>>   	else
>>   		ceph_start_io_write(inode);
>> @@ -1576,14 +1580,15 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>   
>>   		/* we might need to revert back to that point */
>>   		data = *from;
>> -		if (iocb->ki_flags & IOCB_DIRECT) {
>> +		if (iocb->ki_flags & IOCB_DIRECT)
>>   			written = ceph_direct_read_write(iocb, &data, snapc,
>>   							 &prealloc_cf);
>> -			ceph_end_io_direct(inode);
>> -		} else {
>> +		else
>>   			written = ceph_sync_write(iocb, &data, pos, snapc);
>> +		if (shared_lock)
>> +			ceph_end_io_direct(inode);
>> +		else
>>   			ceph_end_io_write(inode);
>> -		}
>>   		if (written > 0)
>>   			iov_iter_advance(from, written);
>>   		ceph_put_snap_context(snapc);
>> @@ -1634,7 +1639,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
>>   
>>   	goto out_unlocked;
>>   out:
>> -	if (iocb->ki_flags & IOCB_DIRECT)
>> +	if (shared_lock)
>>   		ceph_end_io_direct(inode);
>>   	else
>>   		ceph_end_io_write(inode);
> Ok, I think this looks reasonable, but I actually preferred the
> "direct_lock" name you had before. I'm going to do some testing today
> and will probably merge this (with s/shared_lock/direct_lock/) if it
> tests out ok.

Okay :-) Thanks.

>

Patch
diff mbox series

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index ac7fe8b8081c..e3e67ef215dd 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1475,6 +1475,7 @@  static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct ceph_cap_flush *prealloc_cf;
 	ssize_t count, written = 0;
 	int err, want, got;
+	bool shared_lock = false;
 	loff_t pos;
 	loff_t limit = max(i_size_read(inode), fsc->max_file_size);
 
@@ -1485,8 +1486,11 @@  static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	if (!prealloc_cf)
 		return -ENOMEM;
 
+	if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
+		shared_lock = true;
+
 retry_snap:
-	if (iocb->ki_flags & IOCB_DIRECT)
+	if (shared_lock)
 		ceph_start_io_direct(inode);
 	else
 		ceph_start_io_write(inode);
@@ -1576,14 +1580,15 @@  static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 		/* we might need to revert back to that point */
 		data = *from;
-		if (iocb->ki_flags & IOCB_DIRECT) {
+		if (iocb->ki_flags & IOCB_DIRECT)
 			written = ceph_direct_read_write(iocb, &data, snapc,
 							 &prealloc_cf);
-			ceph_end_io_direct(inode);
-		} else {
+		else
 			written = ceph_sync_write(iocb, &data, pos, snapc);
+		if (shared_lock)
+			ceph_end_io_direct(inode);
+		else
 			ceph_end_io_write(inode);
-		}
 		if (written > 0)
 			iov_iter_advance(from, written);
 		ceph_put_snap_context(snapc);
@@ -1634,7 +1639,7 @@  static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
 
 	goto out_unlocked;
 out:
-	if (iocb->ki_flags & IOCB_DIRECT)
+	if (shared_lock)
 		ceph_end_io_direct(inode);
 	else
 		ceph_end_io_write(inode);