diff mbox series

ceph: fix mdsmap_decode got incorrect mds(X)

Message ID 20191203142949.34910-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: fix mdsmap_decode got incorrect mds(X) | expand

Commit Message

Xiubo Li Dec. 3, 2019, 2:29 p.m. UTC
From: Xiubo Li <xiubli@redhat.com>

The possible max rank, it maybe larger than the m->m_num_mds,
for example if the mds_max == 2 in the cluster, when the MDS(0)
was laggy and being replaced by a new MDS, we will temporarily
receive a new mds map with n_num_mds == 1 and the active MDS(1),
and the mds rank >= m->m_num_mds.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mdsmap.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

Comments

Xiubo Li Dec. 3, 2019, 2:47 p.m. UTC | #1
Hi Jeff,

This is the follow up of the previous series about the mdsmap.

The following the logs, we can see the first line there has only one MDS 
with the rank number is mds(1):


<7>[116366.493705] ceph: mdsmap_decode 1/1 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116366.493711] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116366.493712] ceph:  mdsmap_decode success epoch 40
<7>[116366.567941] ceph:  mdsmap_decode 1/2 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116366.567947] ceph:  mdsmap_decode 2/2 4540 mds0.41 
(1)192.168.195.165:6813 up:replay
<7>[116366.567950] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116366.567952] ceph:  mdsmap_decode success epoch 41
<6>[116366.567955] ceph: mds1 recovery completed
<7>[116367.576211] ceph:  mdsmap_decode 1/2 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116367.576215] ceph:  mdsmap_decode 2/2 4540 mds0.41 
(1)192.168.195.165:6813 up:resolve
<7>[116367.576218] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116367.576219] ceph:  mdsmap_decode success epoch 42
<7>[116368.702161] ceph:  mdsmap_decode 1/2 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116368.702171] ceph:  mdsmap_decode 2/2 4540 mds0.41 
(1)192.168.195.165:6813 up:reconnect
<7>[116368.702177] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116368.702180] ceph:  mdsmap_decode success epoch 43
<7>[116369.829235] ceph:  mdsmap_decode 1/2 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116369.829245] ceph:  mdsmap_decode 2/2 4540 mds0.41 
(1)192.168.195.165:6813 up:rejoin
<7>[116369.829253] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116369.829255] ceph:  mdsmap_decode success epoch 44
<7>[116370.982653] ceph:  mdsmap_decode 1/2 4343 mds1.17 
(1)192.168.195.165:6814 up:active
<7>[116370.982696] ceph:  mdsmap_decode 2/2 4540 mds0.41 
(1)192.168.195.165:6813 up:active
<7>[116370.982785] ceph:  mdsmap_decode m_enabled: 1, m_damaged: 0, 
m_num_laggy: 0
<7>[116370.982787] ceph:  mdsmap_decode success epoch 45

Thanks.
BRs

On 2019/12/3 22:29, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> The possible max rank, it maybe larger than the m->m_num_mds,
> for example if the mds_max == 2 in the cluster, when the MDS(0)
> was laggy and being replaced by a new MDS, we will temporarily
> receive a new mds map with n_num_mds == 1 and the active MDS(1),
> and the mds rank >= m->m_num_mds.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/mdsmap.c | 12 +++++++++++-
>   1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 284d68646c40..a77e0ecb9a6b 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -129,6 +129,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>   	int err;
>   	u8 mdsmap_v, mdsmap_cv;
>   	u16 mdsmap_ev;
> +	u32 possible_max_rank;
>   
>   	m = kzalloc(sizeof(*m), GFP_NOFS);
>   	if (!m)
> @@ -164,6 +165,15 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>   	m->m_num_mds = n = ceph_decode_32(p);
>   	m->m_num_active_mds = m->m_num_mds;
>   
> +	/*
> +	 * the possible max rank, it maybe larger than the m->m_num_mds,
> +	 * for example if the mds_max == 2 in the cluster, when the MDS(0)
> +	 * was laggy and being replaced by a new MDS, we will temporarily
> +	 * receive a new mds map with n_num_mds == 1 and the active MDS(1),
> +	 * and the mds rank >= m->m_num_mds.
> +	 */
> +	possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds);
> +
>   	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
>   	if (!m->m_info)
>   		goto nomem;
> @@ -238,7 +248,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>   		     ceph_mds_state_name(state),
>   		     laggy ? "(laggy)" : "");
>   
> -		if (mds < 0 || mds >= m->m_num_mds) {
> +		if (mds < 0 || mds >= possible_max_rank) {
>   			pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
>   			continue;
>   		}
Jeff Layton Dec. 3, 2019, 6:42 p.m. UTC | #2
On Tue, 2019-12-03 at 09:29 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> The possible max rank, it maybe larger than the m->m_num_mds,
> for example if the mds_max == 2 in the cluster, when the MDS(0)
> was laggy and being replaced by a new MDS, we will temporarily
> receive a new mds map with n_num_mds == 1 and the active MDS(1),
> and the mds rank >= m->m_num_mds.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mdsmap.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 284d68646c40..a77e0ecb9a6b 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -129,6 +129,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  	int err;
>  	u8 mdsmap_v, mdsmap_cv;
>  	u16 mdsmap_ev;
> +	u32 possible_max_rank;
>  
>  	m = kzalloc(sizeof(*m), GFP_NOFS);
>  	if (!m)
> @@ -164,6 +165,15 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  	m->m_num_mds = n = ceph_decode_32(p);
>  	m->m_num_active_mds = m->m_num_mds;
>  
> +	/*
> +	 * the possible max rank, it maybe larger than the m->m_num_mds,
> +	 * for example if the mds_max == 2 in the cluster, when the MDS(0)
> +	 * was laggy and being replaced by a new MDS, we will temporarily
> +	 * receive a new mds map with n_num_mds == 1 and the active MDS(1),
> +	 * and the mds rank >= m->m_num_mds.
> +	 */
> +	possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds);
> +
>  	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
>  	if (!m->m_info)
>  		goto nomem;
> @@ -238,7 +248,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>  		     ceph_mds_state_name(state),
>  		     laggy ? "(laggy)" : "");
>  
> -		if (mds < 0 || mds >= m->m_num_mds) {
> +		if (mds < 0 || mds >= possible_max_rank) {
>  			pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
>  			continue;
>  		}

Thanks, Xiubo. I'll squash this one into your earlier ceph_mdsmap_decode
patch, since it's fixing that logic up.
Ilya Dryomov Dec. 3, 2019, 7:17 p.m. UTC | #3
On Tue, Dec 3, 2019 at 3:30 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> The possible max rank, it maybe larger than the m->m_num_mds,
> for example if the mds_max == 2 in the cluster, when the MDS(0)
> was laggy and being replaced by a new MDS, we will temporarily
> receive a new mds map with n_num_mds == 1 and the active MDS(1),
> and the mds rank >= m->m_num_mds.
>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mdsmap.c | 12 +++++++++++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 284d68646c40..a77e0ecb9a6b 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -129,6 +129,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>         int err;
>         u8 mdsmap_v, mdsmap_cv;
>         u16 mdsmap_ev;
> +       u32 possible_max_rank;

I think this should be an int, like mds and m_num_mds,

>
>         m = kzalloc(sizeof(*m), GFP_NOFS);
>         if (!m)
> @@ -164,6 +165,15 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>         m->m_num_mds = n = ceph_decode_32(p);
>         m->m_num_active_mds = m->m_num_mds;
>
> +       /*
> +        * the possible max rank, it maybe larger than the m->m_num_mds,
> +        * for example if the mds_max == 2 in the cluster, when the MDS(0)
> +        * was laggy and being replaced by a new MDS, we will temporarily
> +        * receive a new mds map with n_num_mds == 1 and the active MDS(1),
> +        * and the mds rank >= m->m_num_mds.
> +        */
> +       possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds);

... making this cast unnecessary,

> +
>         m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
>         if (!m->m_info)
>                 goto nomem;
> @@ -238,7 +248,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
>                      ceph_mds_state_name(state),
>                      laggy ? "(laggy)" : "");
>
> -               if (mds < 0 || mds >= m->m_num_mds) {
> +               if (mds < 0 || mds >= possible_max_rank) {

... and avoiding sign mismatch here.

Thanks,

                Ilya
diff mbox series

Patch

diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 284d68646c40..a77e0ecb9a6b 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -129,6 +129,7 @@  struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	int err;
 	u8 mdsmap_v, mdsmap_cv;
 	u16 mdsmap_ev;
+	u32 possible_max_rank;
 
 	m = kzalloc(sizeof(*m), GFP_NOFS);
 	if (!m)
@@ -164,6 +165,15 @@  struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 	m->m_num_mds = n = ceph_decode_32(p);
 	m->m_num_active_mds = m->m_num_mds;
 
+	/*
+	 * the possible max rank, it maybe larger than the m->m_num_mds,
+	 * for example if the mds_max == 2 in the cluster, when the MDS(0)
+	 * was laggy and being replaced by a new MDS, we will temporarily
+	 * receive a new mds map with n_num_mds == 1 and the active MDS(1),
+	 * and the mds rank >= m->m_num_mds.
+	 */
+	possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds);
+
 	m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS);
 	if (!m->m_info)
 		goto nomem;
@@ -238,7 +248,7 @@  struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		     ceph_mds_state_name(state),
 		     laggy ? "(laggy)" : "");
 
-		if (mds < 0 || mds >= m->m_num_mds) {
+		if (mds < 0 || mds >= possible_max_rank) {
 			pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds);
 			continue;
 		}