diff mbox series

ceph: try to queue a writeback if revoking fails

Message ID 20220428124852.80682-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: try to queue a writeback if revoking fails | expand

Commit Message

Xiubo Li April 28, 2022, 12:48 p.m. UTC
If the pagecaches writeback just finished and the i_wrbuffer_ref
reaches zero it will try to trigger ceph_check_caps(). But if just
before ceph_check_caps() the i_wrbuffer_ref could be increased
again by mmap/cache write, then the Fwb revoke will fail.

We need to try to queue a writeback in this case instead of
triggering the writeback by BDI's delayed work per 5 seconds.

URL: https://tracker.ceph.com/issues/55377
URL: https://tracker.ceph.com/issues/46904
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

Comments

Xiubo Li April 28, 2022, 12:49 p.m. UTC | #1
Please ignore this, just forgot to add the v2 tag.

-- Xiubo

On 4/28/22 8:48 PM, Xiubo Li wrote:
> If the pagecaches writeback just finished and the i_wrbuffer_ref
> reaches zero it will try to trigger ceph_check_caps(). But if just
> before ceph_check_caps() the i_wrbuffer_ref could be increased
> again by mmap/cache write, then the Fwb revoke will fail.
>
> We need to try to queue a writeback in this case instead of
> triggering the writeback by BDI's delayed work per 5 seconds.
>
> URL: https://tracker.ceph.com/issues/55377
> URL: https://tracker.ceph.com/issues/46904
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/caps.c | 28 ++++++++++++++++++++++++----
>   1 file changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 906c95d2a4ed..22dae29be64d 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1912,6 +1912,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>   	struct rb_node *p;
>   	bool queue_invalidate = false;
>   	bool tried_invalidate = false;
> +	bool queue_writeback = false;
>   
>   	if (session)
>   		ceph_get_mds_session(session);
> @@ -2064,10 +2065,27 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>   		}
>   
>   		/* completed revocation? going down and there are no caps? */
> -		if (revoking && (revoking & cap_used) == 0) {
> -			dout("completed revocation of %s\n",
> -			     ceph_cap_string(cap->implemented & ~cap->issued));
> -			goto ack;
> +		if (revoking) {
> +			if ((revoking & cap_used) == 0) {
> +				dout("completed revocation of %s\n",
> +				      ceph_cap_string(cap->implemented & ~cap->issued));
> +				goto ack;
> +			}
> +
> +			/*
> +			 * If the "i_wrbuffer_ref" was increased by mmap or generic
> +			 * cache write just before the ceph_check_caps() is called,
> +			 * the Fb capability revoking will fail this time. Then we
> +			 * must wait for the BDI's delayed work to flush the dirty
> +			 * pages and to release the "i_wrbuffer_ref", which will cost
> +			 * at most 5 seconds. That means the MDS needs to wait at
> +			 * most 5 seconds to finished the Fb capability's revocation.
> +			 *
> +			 * Let's queue a writeback for it.
> +			 */
> +			if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
> +			    (revoking & CEPH_CAP_FILE_BUFFER))
> +				queue_writeback = true;
>   		}
>   
>   		/* want more caps from mds? */
> @@ -2137,6 +2155,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
>   	spin_unlock(&ci->i_ceph_lock);
>   
>   	ceph_put_mds_session(session);
> +	if (queue_writeback)
> +		ceph_queue_writeback(inode);
>   	if (queue_invalidate)
>   		ceph_queue_invalidate(inode);
>   }
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 906c95d2a4ed..22dae29be64d 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1912,6 +1912,7 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	struct rb_node *p;
 	bool queue_invalidate = false;
 	bool tried_invalidate = false;
+	bool queue_writeback = false;
 
 	if (session)
 		ceph_get_mds_session(session);
@@ -2064,10 +2065,27 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 		}
 
 		/* completed revocation? going down and there are no caps? */
-		if (revoking && (revoking & cap_used) == 0) {
-			dout("completed revocation of %s\n",
-			     ceph_cap_string(cap->implemented & ~cap->issued));
-			goto ack;
+		if (revoking) {
+			if ((revoking & cap_used) == 0) {
+				dout("completed revocation of %s\n",
+				      ceph_cap_string(cap->implemented & ~cap->issued));
+				goto ack;
+			}
+
+			/*
+			 * If the "i_wrbuffer_ref" was increased by mmap or generic
+			 * cache write just before the ceph_check_caps() is called,
+			 * the Fb capability revoking will fail this time. Then we
+			 * must wait for the BDI's delayed work to flush the dirty
+			 * pages and to release the "i_wrbuffer_ref", which will cost
+			 * at most 5 seconds. That means the MDS needs to wait at
+			 * most 5 seconds to finished the Fb capability's revocation.
+			 *
+			 * Let's queue a writeback for it.
+			 */
+			if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+			    (revoking & CEPH_CAP_FILE_BUFFER))
+				queue_writeback = true;
 		}
 
 		/* want more caps from mds? */
@@ -2137,6 +2155,8 @@  void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	spin_unlock(&ci->i_ceph_lock);
 
 	ceph_put_mds_session(session);
+	if (queue_writeback)
+		ceph_queue_writeback(inode);
 	if (queue_invalidate)
 		ceph_queue_invalidate(inode);
 }