diff mbox series

[v3] ceph: defer flushing the capsnap if the Fb is used

Message ID 20210110020140.141727-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v3] ceph: defer flushing the capsnap if the Fb is used | expand

Commit Message

Xiubo Li Jan. 10, 2021, 2:01 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

If the Fb cap is used it means the current inode is flushing the
dirty data to OSD, just defer flushing the capsnap.

URL: https://tracker.ceph.com/issues/48679
URL: https://tracker.ceph.com/issues/48640
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V3:
- Add more comments about putting the inode ref
- A small change about the code style

V2:
- Fix inode reference leak bug

 fs/ceph/caps.c | 32 +++++++++++++++++++-------------
 fs/ceph/snap.c |  6 +++---
 2 files changed, 22 insertions(+), 16 deletions(-)

Comments

Jeff Layton Jan. 12, 2021, 9:48 p.m. UTC | #1
On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> If the Fb cap is used it means the current inode is flushing the
> dirty data to OSD, just defer flushing the capsnap.
> 
> URL: https://tracker.ceph.com/issues/48679
> URL: https://tracker.ceph.com/issues/48640
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> V3:
> - Add more comments about putting the inode ref
> - A small change about the code style
> 
> V2:
> - Fix inode reference leak bug
> 
>  fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>  fs/ceph/snap.c |  6 +++---
>  2 files changed, 22 insertions(+), 16 deletions(-)
> 

Hi Xiubo,

This patch seems to cause hangs in some xfstests (generic/013, in
particular). I'll take a closer look when I have a chance, but I'm
dropping this for now.

-- Jeff


> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index abbf48fc6230..b00234cf3b04 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>  {
>  	struct inode *inode = &ci->vfs_inode;
>  	int last = 0, put = 0, flushsnaps = 0, wake = 0;
> +	bool check_flushsnaps = false;
>  
> 
> 
> 
>  	spin_lock(&ci->i_ceph_lock);
>  	if (had & CEPH_CAP_PIN)
> @@ -3063,26 +3064,17 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>  	if (had & CEPH_CAP_FILE_BUFFER) {
>  		if (--ci->i_wb_ref == 0) {
>  			last++;
> +			/* put the ref held by ceph_take_cap_refs() */
>  			put++;
> +			check_flushsnaps = true;
>  		}
>  		dout("put_cap_refs %p wb %d -> %d (?)\n",
>  		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
>  	}
> -	if (had & CEPH_CAP_FILE_WR)
> +	if (had & CEPH_CAP_FILE_WR) {
>  		if (--ci->i_wr_ref == 0) {
>  			last++;
> -			if (__ceph_have_pending_cap_snap(ci)) {
> -				struct ceph_cap_snap *capsnap =
> -					list_last_entry(&ci->i_cap_snaps,
> -							struct ceph_cap_snap,
> -							ci_item);
> -				capsnap->writing = 0;
> -				if (ceph_try_drop_cap_snap(ci, capsnap))
> -					put++;
> -				else if (__ceph_finish_cap_snap(ci, capsnap))
> -					flushsnaps = 1;
> -				wake = 1;
> -			}
> +			check_flushsnaps = true;
>  			if (ci->i_wrbuffer_ref_head == 0 &&
>  			    ci->i_dirty_caps == 0 &&
>  			    ci->i_flushing_caps == 0) {
> @@ -3094,6 +3086,20 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>  			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
>  				drop_inode_snap_realm(ci);
>  		}
> +	}
> +	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
> +		struct ceph_cap_snap *capsnap =
> +			list_last_entry(&ci->i_cap_snaps,
> +					struct ceph_cap_snap,
> +					ci_item);
> +		capsnap->writing = 0;
> +		if (ceph_try_drop_cap_snap(ci, capsnap))
> +		        /* put the ref held by ceph_queue_cap_snap() */
> +			put++;
> +		else if (__ceph_finish_cap_snap(ci, capsnap))
> +			flushsnaps = 1;
> +		wake = 1;
> +	}
>  	spin_unlock(&ci->i_ceph_lock);
>  
> 
> 
> 
>  	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index b611f829cb61..639fb91cc9db 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>  	capsnap->context = old_snapc;
>  	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
>  
> 
> 
> 
> -	if (used & CEPH_CAP_FILE_WR) {
> +	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
>  		dout("queue_cap_snap %p cap_snap %p snapc %p"
> -		     " seq %llu used WR, now pending\n", inode,
> -		     capsnap, old_snapc, old_snapc->seq);
> +		     " seq %llu used WR | BUFFFER, now pending\n",
> +		     inode, capsnap, old_snapc, old_snapc->seq);
>  		capsnap->writing = 1;
>  	} else {
>  		/* note mtime, size NOW. */
Xiubo Li Jan. 18, 2021, 9:10 a.m. UTC | #2
On 2021/1/13 5:48, Jeff Layton wrote:
> On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> If the Fb cap is used it means the current inode is flushing the
>> dirty data to OSD, just defer flushing the capsnap.
>>
>> URL: https://tracker.ceph.com/issues/48679
>> URL: https://tracker.ceph.com/issues/48640
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - Add more comments about putting the inode ref
>> - A small change about the code style
>>
>> V2:
>> - Fix inode reference leak bug
>>
>>   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>>   fs/ceph/snap.c |  6 +++---
>>   2 files changed, 22 insertions(+), 16 deletions(-)
>>
> Hi Xiubo,
>
> This patch seems to cause hangs in some xfstests (generic/013, in
> particular). I'll take a closer look when I have a chance, but I'm
> dropping this for now.

Okay.

BTW, what's your test commands to reproduce it ? I will take a look when 
I am free these days or later.

BRs

>
> -- Jeff
>
>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index abbf48fc6230..b00234cf3b04 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   {
>>   	struct inode *inode = &ci->vfs_inode;
>>   	int last = 0, put = 0, flushsnaps = 0, wake = 0;
>> +	bool check_flushsnaps = false;
>>   
>>
>>
>>
>>   	spin_lock(&ci->i_ceph_lock);
>>   	if (had & CEPH_CAP_PIN)
>> @@ -3063,26 +3064,17 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   	if (had & CEPH_CAP_FILE_BUFFER) {
>>   		if (--ci->i_wb_ref == 0) {
>>   			last++;
>> +			/* put the ref held by ceph_take_cap_refs() */
>>   			put++;
>> +			check_flushsnaps = true;
>>   		}
>>   		dout("put_cap_refs %p wb %d -> %d (?)\n",
>>   		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
>>   	}
>> -	if (had & CEPH_CAP_FILE_WR)
>> +	if (had & CEPH_CAP_FILE_WR) {
>>   		if (--ci->i_wr_ref == 0) {
>>   			last++;
>> -			if (__ceph_have_pending_cap_snap(ci)) {
>> -				struct ceph_cap_snap *capsnap =
>> -					list_last_entry(&ci->i_cap_snaps,
>> -							struct ceph_cap_snap,
>> -							ci_item);
>> -				capsnap->writing = 0;
>> -				if (ceph_try_drop_cap_snap(ci, capsnap))
>> -					put++;
>> -				else if (__ceph_finish_cap_snap(ci, capsnap))
>> -					flushsnaps = 1;
>> -				wake = 1;
>> -			}
>> +			check_flushsnaps = true;
>>   			if (ci->i_wrbuffer_ref_head == 0 &&
>>   			    ci->i_dirty_caps == 0 &&
>>   			    ci->i_flushing_caps == 0) {
>> @@ -3094,6 +3086,20 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>>   			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
>>   				drop_inode_snap_realm(ci);
>>   		}
>> +	}
>> +	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
>> +		struct ceph_cap_snap *capsnap =
>> +			list_last_entry(&ci->i_cap_snaps,
>> +					struct ceph_cap_snap,
>> +					ci_item);
>> +		capsnap->writing = 0;
>> +		if (ceph_try_drop_cap_snap(ci, capsnap))
>> +		        /* put the ref held by ceph_queue_cap_snap() */
>> +			put++;
>> +		else if (__ceph_finish_cap_snap(ci, capsnap))
>> +			flushsnaps = 1;
>> +		wake = 1;
>> +	}
>>   	spin_unlock(&ci->i_ceph_lock);
>>   
>>
>>
>>
>>   	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>> index b611f829cb61..639fb91cc9db 100644
>> --- a/fs/ceph/snap.c
>> +++ b/fs/ceph/snap.c
>> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>>   	capsnap->context = old_snapc;
>>   	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
>>   
>>
>>
>>
>> -	if (used & CEPH_CAP_FILE_WR) {
>> +	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
>>   		dout("queue_cap_snap %p cap_snap %p snapc %p"
>> -		     " seq %llu used WR, now pending\n", inode,
>> -		     capsnap, old_snapc, old_snapc->seq);
>> +		     " seq %llu used WR | BUFFFER, now pending\n",
>> +		     inode, capsnap, old_snapc, old_snapc->seq);
>>   		capsnap->writing = 1;
>>   	} else {
>>   		/* note mtime, size NOW. */
Jeff Layton Jan. 18, 2021, 11:08 a.m. UTC | #3
On Mon, 2021-01-18 at 17:10 +0800, Xiubo Li wrote:
> On 2021/1/13 5:48, Jeff Layton wrote:
> > On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > If the Fb cap is used it means the current inode is flushing the
> > > dirty data to OSD, just defer flushing the capsnap.
> > > 
> > > URL: https://tracker.ceph.com/issues/48679
> > > URL: https://tracker.ceph.com/issues/48640
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > > 
> > > V3:
> > > - Add more comments about putting the inode ref
> > > - A small change about the code style
> > > 
> > > V2:
> > > - Fix inode reference leak bug
> > > 
> > >   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
> > >   fs/ceph/snap.c |  6 +++---
> > >   2 files changed, 22 insertions(+), 16 deletions(-)
> > > 
> > Hi Xiubo,
> > 
> > This patch seems to cause hangs in some xfstests (generic/013, in
> > particular). I'll take a closer look when I have a chance, but I'm
> > dropping this for now.
> 
> Okay.
> 
> BTW, what's your test commands to reproduce it ? I will take a look when 
> I am free these days or later.
> 
> BRs
> 

I set up xfstests to run on cephfs, and then just run:

    $ sudo ./check generic/013

It wouldn't reliably complete with this patch in place. Setting up
xfstests is the "hard part". I'll plan to roll up a wiki page on how to
do that soon (that's good info to have out there anyway).
> 

Cheers,
Xiubo Li Jan. 20, 2021, 12:56 a.m. UTC | #4
On 2021/1/18 19:08, Jeff Layton wrote:
> On Mon, 2021-01-18 at 17:10 +0800, Xiubo Li wrote:
>> On 2021/1/13 5:48, Jeff Layton wrote:
>>> On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> If the Fb cap is used it means the current inode is flushing the
>>>> dirty data to OSD, just defer flushing the capsnap.
>>>>
>>>> URL: https://tracker.ceph.com/issues/48679
>>>> URL: https://tracker.ceph.com/issues/48640
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> V3:
>>>> - Add more comments about putting the inode ref
>>>> - A small change about the code style
>>>>
>>>> V2:
>>>> - Fix inode reference leak bug
>>>>
>>>>   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>>>>   fs/ceph/snap.c |  6 +++---
>>>>   2 files changed, 22 insertions(+), 16 deletions(-)
>>>>
>>> Hi Xiubo,
>>>
>>> This patch seems to cause hangs in some xfstests (generic/013, in
>>> particular). I'll take a closer look when I have a chance, but I'm
>>> dropping this for now.
>> Okay.
>>
>> BTW, what's your test commands to reproduce it ? I will take a look when
>> I am free these days or later.
>>
>> BRs
>>
> I set up xfstests to run on cephfs, and then just run:
>
>      $ sudo ./check generic/013
>
> It wouldn't reliably complete with this patch in place. Setting up
> xfstests is the "hard part". I'll plan to roll up a wiki page on how to
> do that soon (that's good info to have out there anyway).

Okay, sure.

Thanks.


> Cheers,
Jeff Layton Jan. 20, 2021, 8:04 p.m. UTC | #5
On Wed, 2021-01-20 at 08:56 +0800, Xiubo Li wrote:
> On 2021/1/18 19:08, Jeff Layton wrote:
> > On Mon, 2021-01-18 at 17:10 +0800, Xiubo Li wrote:
> > > On 2021/1/13 5:48, Jeff Layton wrote:
> > > > On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
> > > > > From: Xiubo Li <xiubli@redhat.com>
> > > > > 
> > > > > If the Fb cap is used it means the current inode is flushing the
> > > > > dirty data to OSD, just defer flushing the capsnap.
> > > > > 
> > > > > URL: https://tracker.ceph.com/issues/48679
> > > > > URL: https://tracker.ceph.com/issues/48640
> > > > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > > > ---
> > > > > 
> > > > > V3:
> > > > > - Add more comments about putting the inode ref
> > > > > - A small change about the code style
> > > > > 
> > > > > V2:
> > > > > - Fix inode reference leak bug
> > > > > 
> > > > >   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
> > > > >   fs/ceph/snap.c |  6 +++---
> > > > >   2 files changed, 22 insertions(+), 16 deletions(-)
> > > > > 
> > > > Hi Xiubo,
> > > > 
> > > > This patch seems to cause hangs in some xfstests (generic/013, in
> > > > particular). I'll take a closer look when I have a chance, but I'm
> > > > dropping this for now.
> > > Okay.
> > > 
> > > BTW, what's your test commands to reproduce it ? I will take a look when
> > > I am free these days or later.
> > > 
> > > BRs
> > > 
> > I set up xfstests to run on cephfs, and then just run:
> > 
> >      $ sudo ./check generic/013
> > 
> > It wouldn't reliably complete with this patch in place. Setting up
> > xfstests is the "hard part". I'll plan to roll up a wiki page on how to
> > do that soon (that's good info to have out there anyway).
> 
> Okay, sure.
> 

I'm not sure where this should be documented. Still, here's a
local.config that I'm using now (with comments). I'm happy to merge this
somewhere for posterity, but not sure where it should go.
Jeff Layton Jan. 21, 2021, 2:28 p.m. UTC | #6
On Mon, 2021-01-18 at 17:10 +0800, Xiubo Li wrote:
> On 2021/1/13 5:48, Jeff Layton wrote:
> > On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > If the Fb cap is used it means the current inode is flushing the
> > > dirty data to OSD, just defer flushing the capsnap.
> > > 
> > > URL: https://tracker.ceph.com/issues/48679
> > > URL: https://tracker.ceph.com/issues/48640
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > > 
> > > V3:
> > > - Add more comments about putting the inode ref
> > > - A small change about the code style
> > > 
> > > V2:
> > > - Fix inode reference leak bug
> > > 
> > >   fs/ceph/caps.c | 32 +++++++++++++++++++-------------
> > >   fs/ceph/snap.c |  6 +++---
> > >   2 files changed, 22 insertions(+), 16 deletions(-)
> > > 
> > Hi Xiubo,
> > 
> > This patch seems to cause hangs in some xfstests (generic/013, in
> > particular). I'll take a closer look when I have a chance, but I'm
> > dropping this for now.
> 
> Okay.
> 
> BTW, what's your test commands to reproduce it ? I will take a look when 
> I am free these days or later.
> 


FWIW, I was able to trigger a hang with this patch by running one of the
tests that this patch was intended to fix (snaptest-git-ceph.sh). Here's
the stack trace of the hung task:

# cat /proc/1166/stack
[<0>] wait_woken+0x87/0xb0
[<0>] ceph_get_caps+0x405/0x6a0 [ceph]
[<0>] ceph_write_iter+0x2ca/0xd20 [ceph]
[<0>] new_sync_write+0x10b/0x190
[<0>] vfs_write+0x240/0x390
[<0>] ksys_write+0x58/0xd0
[<0>] do_syscall_64+0x33/0x40
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Without this patch I could run that test in a loop without issue. This
bug mentions that the original issue occurred during mds thrashing
though, and I haven't tried reproducing that scenario yet:

    https://tracker.ceph.com/issues/48640

Cheers,
Xiubo Li Jan. 22, 2021, 10:07 a.m. UTC | #7
On 2021/1/21 22:28, Jeff Layton wrote:
> On Mon, 2021-01-18 at 17:10 +0800, Xiubo Li wrote:
>> On 2021/1/13 5:48, Jeff Layton wrote:
>>> On Sun, 2021-01-10 at 10:01 +0800, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> If the Fb cap is used it means the current inode is flushing the
>>>> dirty data to OSD, just defer flushing the capsnap.
>>>>
>>>> URL: https://tracker.ceph.com/issues/48679
>>>> URL: https://tracker.ceph.com/issues/48640
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> V3:
>>>> - Add more comments about putting the inode ref
>>>> - A small change about the code style
>>>>
>>>> V2:
>>>> - Fix inode reference leak bug
>>>>
>>>>    fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>>>>    fs/ceph/snap.c |  6 +++---
>>>>    2 files changed, 22 insertions(+), 16 deletions(-)
>>>>
>>> Hi Xiubo,
>>>
>>> This patch seems to cause hangs in some xfstests (generic/013, in
>>> particular). I'll take a closer look when I have a chance, but I'm
>>> dropping this for now.
>> Okay.
>>
>> BTW, what's your test commands to reproduce it ? I will take a look when
>> I am free these days or later.
>>
>
> FWIW, I was able to trigger a hang with this patch by running one of the
> tests that this patch was intended to fix (snaptest-git-ceph.sh). Here's
> the stack trace of the hung task:
>
> # cat /proc/1166/stack
> [<0>] wait_woken+0x87/0xb0
> [<0>] ceph_get_caps+0x405/0x6a0 [ceph]
> [<0>] ceph_write_iter+0x2ca/0xd20 [ceph]
> [<0>] new_sync_write+0x10b/0x190
> [<0>] vfs_write+0x240/0x390
> [<0>] ksys_write+0x58/0xd0
> [<0>] do_syscall_64+0x33/0x40
> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

Hi Jeff,

I have reproduced it, and also tried the libcephfs, which have the same 
logic for this issue, and it worked well.

I will take a look at it later.

> Without this patch I could run that test in a loop without issue. This
> bug mentions that the original issue occurred during mds thrashing
> though, and I haven't tried reproducing that scenario yet:
>
>      https://tracker.ceph.com/issues/48640

 From the logs this issue seems not related the thrashing operation, 
during this test the MDS has already been secussfully thrashed.

BRs

Xiubo

> Cheers,
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index abbf48fc6230..b00234cf3b04 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3047,6 +3047,7 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 {
 	struct inode *inode = &ci->vfs_inode;
 	int last = 0, put = 0, flushsnaps = 0, wake = 0;
+	bool check_flushsnaps = false;
 
 	spin_lock(&ci->i_ceph_lock);
 	if (had & CEPH_CAP_PIN)
@@ -3063,26 +3064,17 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 	if (had & CEPH_CAP_FILE_BUFFER) {
 		if (--ci->i_wb_ref == 0) {
 			last++;
+			/* put the ref held by ceph_take_cap_refs() */
 			put++;
+			check_flushsnaps = true;
 		}
 		dout("put_cap_refs %p wb %d -> %d (?)\n",
 		     inode, ci->i_wb_ref+1, ci->i_wb_ref);
 	}
-	if (had & CEPH_CAP_FILE_WR)
+	if (had & CEPH_CAP_FILE_WR) {
 		if (--ci->i_wr_ref == 0) {
 			last++;
-			if (__ceph_have_pending_cap_snap(ci)) {
-				struct ceph_cap_snap *capsnap =
-					list_last_entry(&ci->i_cap_snaps,
-							struct ceph_cap_snap,
-							ci_item);
-				capsnap->writing = 0;
-				if (ceph_try_drop_cap_snap(ci, capsnap))
-					put++;
-				else if (__ceph_finish_cap_snap(ci, capsnap))
-					flushsnaps = 1;
-				wake = 1;
-			}
+			check_flushsnaps = true;
 			if (ci->i_wrbuffer_ref_head == 0 &&
 			    ci->i_dirty_caps == 0 &&
 			    ci->i_flushing_caps == 0) {
@@ -3094,6 +3086,20 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 			if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
 				drop_inode_snap_realm(ci);
 		}
+	}
+	if (check_flushsnaps && __ceph_have_pending_cap_snap(ci)) {
+		struct ceph_cap_snap *capsnap =
+			list_last_entry(&ci->i_cap_snaps,
+					struct ceph_cap_snap,
+					ci_item);
+		capsnap->writing = 0;
+		if (ceph_try_drop_cap_snap(ci, capsnap))
+		        /* put the ref held by ceph_queue_cap_snap() */
+			put++;
+		else if (__ceph_finish_cap_snap(ci, capsnap))
+			flushsnaps = 1;
+		wake = 1;
+	}
 	spin_unlock(&ci->i_ceph_lock);
 
 	dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b611f829cb61..639fb91cc9db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -561,10 +561,10 @@  void ceph_queue_cap_snap(struct ceph_inode_info *ci)
 	capsnap->context = old_snapc;
 	list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
 
-	if (used & CEPH_CAP_FILE_WR) {
+	if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
 		dout("queue_cap_snap %p cap_snap %p snapc %p"
-		     " seq %llu used WR, now pending\n", inode,
-		     capsnap, old_snapc, old_snapc->seq);
+		     " seq %llu used WR | BUFFFER, now pending\n",
+		     inode, capsnap, old_snapc, old_snapc->seq);
 		capsnap->writing = 1;
 	} else {
 		/* note mtime, size NOW. */