diff mbox series

[v3,3/3] mdsmap: only choose one MDS who is in up:active state without laggy

Message ID 20191126122422.12396-4-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series mdsmap: fix mds choosing | expand

Commit Message

Xiubo Li Nov. 26, 2019, 12:24 p.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Even the MDS is in up:active state, but it also maybe laggy. Here
will skip the laggy MDSs.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 13 +++++++++----
 fs/ceph/mdsmap.c     | 30 +++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0444288fe87e..2c92a1452876 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -972,14 +972,14 @@  static int __choose_mds(struct ceph_mds_client *mdsc,
 				     frag.frag, mds,
 				     (int)r, frag.ndist);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
+				    CEPH_MDS_STATE_ACTIVE &&
+				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
 					goto out;
 			}
 
 			/* since this file/dir wasn't known to be
 			 * replicated, then we want to look for the
 			 * authoritative mds. */
-			mode = USE_AUTH_MDS;
 			if (frag.mds >= 0) {
 				/* choose auth mds */
 				mds = frag.mds;
@@ -987,9 +987,14 @@  static int __choose_mds(struct ceph_mds_client *mdsc,
 				     "frag %u mds%d (auth)\n",
 				     inode, ceph_vinop(inode), frag.frag, mds);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
-					goto out;
+				    CEPH_MDS_STATE_ACTIVE) {
+					if (mode == USE_ANY_MDS &&
+					    !ceph_mdsmap_is_laggy(mdsc->mdsmap,
+								  mds))
+						goto out;
+				}
 			}
+			mode = USE_AUTH_MDS;
 		}
 	}
 
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 3418cf2c6a12..284d68646c40 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -13,22 +13,24 @@ 
 
 #include "super.h"
 
+#define CEPH_MDS_IS_READY(i, ignore_laggy) \
+	(m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy))
 
-/*
- * choose a random mds that is "up" (i.e. has a state > 0), or -1.
- */
-int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy)
 {
 	int n = 0;
 	int i, j;
 
-	/* special case for one mds */
+	/*
+	 * special case for one mds, no matter it is laggy or
+	 * not we have no choice
+	 */
 	if (1 == m->m_num_mds && m->m_info[0].state > 0)
 		return 0;
 
 	/* count */
 	for (i = 0; i < m->m_num_mds; i++)
-		if (m->m_info[i].state > 0)
+		if (CEPH_MDS_IS_READY(i, ignore_laggy))
 			n++;
 	if (n == 0)
 		return -1;
@@ -36,7 +38,7 @@  int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	/* pick */
 	n = prandom_u32() % n;
 	for (j = 0, i = 0; i < m->m_num_mds; i++) {
-		if (m->m_info[i].state > 0)
+		if (CEPH_MDS_IS_READY(i, ignore_laggy))
 			j++;
 		if (j > n)
 			break;
@@ -45,6 +47,20 @@  int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	return i;
 }
 
+/*
+ * choose a random mds that is "up" (i.e. has a state > 0), or -1.
+ */
+int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
+{
+	int mds;
+
+	mds = __mdsmap_get_random_mds(m, false);
+	if (mds == m->m_num_mds || mds == -1)
+		mds = __mdsmap_get_random_mds(m, true);
+
+	return mds == m->m_num_mds ? -1 : mds;
+}
+
 #define __decode_and_drop_type(p, end, type, bad)		\
 	do {							\
 		if (*p + sizeof(type) > end)			\