diff mbox series

[v4] ceph: flush the mdlog for filesystem sync

Message ID 20220419005849.802780-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v4] ceph: flush the mdlog for filesystem sync | expand

Commit Message

Xiubo Li April 19, 2022, 12:58 a.m. UTC
Before waiting for a request's safe reply, we will send the mdlog
flush request to the relevant MDS. And this will also flush the
mdlog for all the other unsafe requests in the same session, so
we can record the last session and no need to flush mdlog again
in the next loop. But there still have cases that it may send the
mdlog flush requst twice or more, but that should be not often.

Rename wait_unsafe_requests() to flush_mdlog_and_wait_inode_unsafe_requests()
to make it more descriptive.

URL: https://tracker.ceph.com/issues/55284
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V4:
- Fixed the lock inversion bug.



 fs/ceph/mds_client.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

Comments

Jeff Layton April 19, 2022, 10:16 a.m. UTC | #1
On Tue, 2022-04-19 at 08:58 +0800, Xiubo Li wrote:
> Before waiting for a request's safe reply, we will send the mdlog
> flush request to the relevant MDS. And this will also flush the
> mdlog for all the other unsafe requests in the same session, so
> we can record the last session and no need to flush mdlog again
> in the next loop. But there still have cases that it may send the
> mdlog flush requst twice or more, but that should be not often.
> 
> Rename wait_unsafe_requests() to flush_mdlog_and_wait_inode_unsafe_requests()
> to make it more descriptive.
> 
> URL: https://tracker.ceph.com/issues/55284
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> 
> V4:
> - Fixed the lock inversion bug.
> 
> 
> 
>  fs/ceph/mds_client.c | 34 ++++++++++++++++++++++++++++------
>  1 file changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 0da85c9ce73a..58827af57b7f 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -5093,15 +5093,17 @@ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
>  }
>  
>  /*
> - * wait for all write mds requests to flush.
> + * flush the mdlog and wait for all write mds requests to flush.
>   */
> -static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
> +static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc,
> +						 u64 want_tid)
>  {
>  	struct ceph_mds_request *req = NULL, *nextreq;
> +	struct ceph_mds_session *last_session = NULL;
>  	struct rb_node *n;
>  
>  	mutex_lock(&mdsc->mutex);
> -	dout("wait_unsafe_requests want %lld\n", want_tid);
> +	dout("%s want %lld\n", __func__, want_tid);
>  restart:
>  	req = __get_oldest_req(mdsc);
>  	while (req && req->r_tid <= want_tid) {
> @@ -5113,14 +5115,33 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
>  			nextreq = NULL;
>  		if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
>  		    (req->r_op & CEPH_MDS_OP_WRITE)) {
> +			struct ceph_mds_session *s;
> +
>  			/* write op */
>  			ceph_mdsc_get_request(req);
>  			if (nextreq)
>  				ceph_mdsc_get_request(nextreq);
> +
> +			s = req->r_session;
> +			if (!s) {
> +				req = nextreq;
> +				continue;
> +			}
> +			s = ceph_get_mds_session(s);
>  			mutex_unlock(&mdsc->mutex);
> -			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
> +
> +			/* send flush mdlog request to MDS */
> +			if (last_session != s) {
> +				send_flush_mdlog(s);
> +				ceph_put_mds_session(last_session);
> +				last_session = s;
> +			} else {
> +				ceph_put_mds_session(s);
> +			}
> +			dout("%s wait on %llu (want %llu)\n", __func__,
>  			     req->r_tid, want_tid);
>  			wait_for_completion(&req->r_safe_completion);
> +
>  			mutex_lock(&mdsc->mutex);
>  			ceph_mdsc_put_request(req);
>  			if (!nextreq)
> @@ -5135,7 +5156,8 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
>  		req = nextreq;
>  	}
>  	mutex_unlock(&mdsc->mutex);
> -	dout("wait_unsafe_requests done\n");
> +	ceph_put_mds_session(last_session);
> +	dout("%s done\n", __func__);
>  }
>  
>  void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
> @@ -5164,7 +5186,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
>  	dout("sync want tid %lld flush_seq %lld\n",
>  	     want_tid, want_flush);
>  
> -	wait_unsafe_requests(mdsc, want_tid);
> +	flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid);
>  	wait_caps_flush(mdsc, want_flush);
>  }
>  

Reviewed-by: Jeff Layton <jlayton@kernel.org>
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0da85c9ce73a..58827af57b7f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -5093,15 +5093,17 @@  void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 }
 
 /*
- * wait for all write mds requests to flush.
+ * flush the mdlog and wait for all write mds requests to flush.
  */
-static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
+static void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc,
+						 u64 want_tid)
 {
 	struct ceph_mds_request *req = NULL, *nextreq;
+	struct ceph_mds_session *last_session = NULL;
 	struct rb_node *n;
 
 	mutex_lock(&mdsc->mutex);
-	dout("wait_unsafe_requests want %lld\n", want_tid);
+	dout("%s want %lld\n", __func__, want_tid);
 restart:
 	req = __get_oldest_req(mdsc);
 	while (req && req->r_tid <= want_tid) {
@@ -5113,14 +5115,33 @@  static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
 			nextreq = NULL;
 		if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
 		    (req->r_op & CEPH_MDS_OP_WRITE)) {
+			struct ceph_mds_session *s;
+
 			/* write op */
 			ceph_mdsc_get_request(req);
 			if (nextreq)
 				ceph_mdsc_get_request(nextreq);
+
+			s = req->r_session;
+			if (!s) {
+				req = nextreq;
+				continue;
+			}
+			s = ceph_get_mds_session(s);
 			mutex_unlock(&mdsc->mutex);
-			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
+
+			/* send flush mdlog request to MDS */
+			if (last_session != s) {
+				send_flush_mdlog(s);
+				ceph_put_mds_session(last_session);
+				last_session = s;
+			} else {
+				ceph_put_mds_session(s);
+			}
+			dout("%s wait on %llu (want %llu)\n", __func__,
 			     req->r_tid, want_tid);
 			wait_for_completion(&req->r_safe_completion);
+
 			mutex_lock(&mdsc->mutex);
 			ceph_mdsc_put_request(req);
 			if (!nextreq)
@@ -5135,7 +5156,8 @@  static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
 		req = nextreq;
 	}
 	mutex_unlock(&mdsc->mutex);
-	dout("wait_unsafe_requests done\n");
+	ceph_put_mds_session(last_session);
+	dout("%s done\n", __func__);
 }
 
 void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
@@ -5164,7 +5186,7 @@  void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 	dout("sync want tid %lld flush_seq %lld\n",
 	     want_tid, want_flush);
 
-	wait_unsafe_requests(mdsc, want_tid);
+	flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid);
 	wait_caps_flush(mdsc, want_flush);
 }