diff mbox

[1/2] pnfs/blocklayout: update last_write_offset in prepare_layoutcommit

Message ID 973de15d7e14e23d6945ad03713384c97a6a0087.1469730653.git.bcodding@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Benjamin Coddington July 28, 2016, 6:41 p.m. UTC
Block/SCSI layout write completion may add committable extents to the
extent tree before updating the layout's last-written byte under the inode
lock.  If a sync happens before this value is updated, then
prepare_layoutcommit may find and encode these extents which would produce
a LAYOUTCOMMIT request whose encoded extents are larger than the request's
loca_length.

Fix this by updating the last_write_offset to match the currently encoded
extents.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/nfs/blocklayout/extent_tree.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

Comments

Trond Myklebust July 28, 2016, 6:47 p.m. UTC | #1
On Thu, 2016-07-28 at 14:41 -0400, Benjamin Coddington wrote:
> Block/SCSI layout write completion may add committable extents to the

> extent tree before updating the layout's last-written byte under the

> inode

> lock.  If a sync happens before this value is updated, then

> prepare_layoutcommit may find and encode these extents which would

> produce

> a LAYOUTCOMMIT request whose encoded extents are larger than the

> request's

> loca_length.

> 

> Fix this by updating the last_write_offset to match the currently

> encoded

> extents.

> 

> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>

> ---

>  fs/nfs/blocklayout/extent_tree.c | 6 ++++--

>  1 file changed, 4 insertions(+), 2 deletions(-)

> 

> diff --git a/fs/nfs/blocklayout/extent_tree.c

> b/fs/nfs/blocklayout/extent_tree.c

> index 992bcb19c11e..18ae1fd2175e 100644

> --- a/fs/nfs/blocklayout/extent_tree.c

> +++ b/fs/nfs/blocklayout/extent_tree.c

> @@ -518,7 +518,7 @@ static __be32 *encode_scsi_range(struct

> pnfs_block_extent *be, __be32 *p)

>  }

>  

>  static int ext_tree_encode_commit(struct pnfs_block_layout *bl,

> __be32 *p,

> -		size_t buffer_size, size_t *count)

> +		size_t buffer_size, size_t *count, __u64 *lastbyte)

>  {

>  	struct pnfs_block_extent *be;

>  	int ret = 0;

> @@ -541,6 +541,8 @@ static int ext_tree_encode_commit(struct

> pnfs_block_layout *bl, __be32 *p,

>  		else

>  			p = encode_block_extent(be, p);

>  		be->be_tag = EXTENT_COMMITTING;

> +		if ((ext_f_end(be) << SECTOR_SHIFT) - 1 > *lastbyte)

> +			*lastbyte = (ext_f_end(be) << SECTOR_SHIFT) 

> - 1;


Won't this cause the file size to be always sector aligned on the
server? I was assuming that we would have to store the lastbyte
atomically with setting up the commit in ext_tree_mark_written().

>  	}

>  	spin_unlock(&bl->bl_ext_lock);

>  

> @@ -564,7 +566,7 @@ ext_tree_prepare_commit(struct

> nfs4_layoutcommit_args *arg)

>  	arg->layoutupdate_pages = &arg->layoutupdate_page;

>  

>  retry:

> -	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,

> &count);

> +	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,

> &count, &arg->lastbytewritten);

>  	if (unlikely(ret)) {

>  		ext_tree_free_commitdata(arg, buffer_size);

>
Benjamin Coddington July 28, 2016, 8:03 p.m. UTC | #2
On 28 Jul 2016, at 14:47, Trond Myklebust wrote:

> On Thu, 2016-07-28 at 14:41 -0400, Benjamin Coddington wrote:
>> Block/SCSI layout write completion may add committable extents to the
>> extent tree before updating the layout's last-written byte under the
>> inode
>> lock.  If a sync happens before this value is updated, then
>> prepare_layoutcommit may find and encode these extents which would
>> produce
>> a LAYOUTCOMMIT request whose encoded extents are larger than the
>> request's
>> loca_length.
>>
>> Fix this by updating the last_write_offset to match the currently
>> encoded
>> extents.
>>
>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>> ---
>>  fs/nfs/blocklayout/extent_tree.c | 6 ++++--
>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/nfs/blocklayout/extent_tree.c
>> b/fs/nfs/blocklayout/extent_tree.c
>> index 992bcb19c11e..18ae1fd2175e 100644
>> --- a/fs/nfs/blocklayout/extent_tree.c
>> +++ b/fs/nfs/blocklayout/extent_tree.c
>> @@ -518,7 +518,7 @@ static __be32 *encode_scsi_range(struct
>> pnfs_block_extent *be, __be32 *p)
>>  }
>>  
>>  static int ext_tree_encode_commit(struct pnfs_block_layout *bl,
>> __be32 *p,
>> -		size_t buffer_size, size_t *count)
>> +		size_t buffer_size, size_t *count, __u64 *lastbyte)
>>  {
>>  	struct pnfs_block_extent *be;
>>  	int ret = 0;
>> @@ -541,6 +541,8 @@ static int ext_tree_encode_commit(struct
>> pnfs_block_layout *bl, __be32 *p,
>>  		else
>>  			p = encode_block_extent(be, p);
>>  		be->be_tag = EXTENT_COMMITTING;
>> +		if ((ext_f_end(be) << SECTOR_SHIFT) - 1 > *lastbyte)
>> +			*lastbyte = (ext_f_end(be) << SECTOR_SHIFT)
>> - 1;
>
> Won't this cause the file size to be always sector aligned on the
> server? I was assuming that we would have to store the lastbyte
> atomically with setting up the commit in ext_tree_mark_written().

You're right.  It is incorrect.  I'll fix it.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Coddington July 29, 2016, 2 p.m. UTC | #3
On 28 Jul 2016, at 16:03, Benjamin Coddington wrote:

> On 28 Jul 2016, at 14:47, Trond Myklebust wrote:
>
>> On Thu, 2016-07-28 at 14:41 -0400, Benjamin Coddington wrote:
>>> Block/SCSI layout write completion may add committable extents to the
>>> extent tree before updating the layout's last-written byte under the
>>> inode
>>> lock.  If a sync happens before this value is updated, then
>>> prepare_layoutcommit may find and encode these extents which would
>>> produce
>>> a LAYOUTCOMMIT request whose encoded extents are larger than the
>>> request's
>>> loca_length.
>>>
>>> Fix this by updating the last_write_offset to match the currently
>>> encoded
>>> extents.
>>>
>>> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
>>> ---
>>>  fs/nfs/blocklayout/extent_tree.c | 6 ++++--
>>>  1 file changed, 4 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/nfs/blocklayout/extent_tree.c
>>> b/fs/nfs/blocklayout/extent_tree.c
>>> index 992bcb19c11e..18ae1fd2175e 100644
>>> --- a/fs/nfs/blocklayout/extent_tree.c
>>> +++ b/fs/nfs/blocklayout/extent_tree.c
>>> @@ -518,7 +518,7 @@ static __be32 *encode_scsi_range(struct
>>> pnfs_block_extent *be, __be32 *p)
>>>  }
>>>  
>>>  static int ext_tree_encode_commit(struct pnfs_block_layout *bl,
>>> __be32 *p,
>>> -		size_t buffer_size, size_t *count)
>>> +		size_t buffer_size, size_t *count, __u64 *lastbyte)
>>>  {
>>>  	struct pnfs_block_extent *be;
>>>  	int ret = 0;
>>> @@ -541,6 +541,8 @@ static int ext_tree_encode_commit(struct
>>> pnfs_block_layout *bl, __be32 *p,
>>>  		else
>>>  			p = encode_block_extent(be, p);
>>>  		be->be_tag = EXTENT_COMMITTING;
>>> +		if ((ext_f_end(be) << SECTOR_SHIFT) - 1 > *lastbyte)
>>> +			*lastbyte = (ext_f_end(be) << SECTOR_SHIFT)
>>> - 1;
>>
>> Won't this cause the file size to be always sector aligned on the
>> server? I was assuming that we would have to store the lastbyte
>> atomically with setting up the commit in ext_tree_mark_written().
>
> You're right.  It is incorrect.  I'll fix it.

This turns out to be a little trickier than I thought, and so is taking me
longer than I have time at the moment.  Due to travel, I'll have to come
back to this week after next.  Thanks for catching my stupid mistake.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 992bcb19c11e..18ae1fd2175e 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -518,7 +518,7 @@  static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
 }
 
 static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
-		size_t buffer_size, size_t *count)
+		size_t buffer_size, size_t *count, __u64 *lastbyte)
 {
 	struct pnfs_block_extent *be;
 	int ret = 0;
@@ -541,6 +541,8 @@  static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
 		else
 			p = encode_block_extent(be, p);
 		be->be_tag = EXTENT_COMMITTING;
+		if ((ext_f_end(be) << SECTOR_SHIFT) - 1 > *lastbyte)
+			*lastbyte = (ext_f_end(be) << SECTOR_SHIFT) - 1;
 	}
 	spin_unlock(&bl->bl_ext_lock);
 
@@ -564,7 +566,7 @@  ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
 	arg->layoutupdate_pages = &arg->layoutupdate_page;
 
 retry:
-	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count);
+	ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
 	if (unlikely(ret)) {
 		ext_tree_free_commitdata(arg, buffer_size);