[3/3] mdsmap: only choose one MDS who is in up:active state without laggy
diff mbox series

Message ID 20191120082902.38666-4-xiubli@redhat.com
State New
Headers show
Series
  • mdsmap: fix mds choosing
Related show

Commit Message

Xiubo Li Nov. 20, 2019, 8:29 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Even the MDS is in up:active state, but it also maybe laggy. Here
will skip the laggy MDSs.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c |  6 ++++--
 fs/ceph/mdsmap.c     | 13 +++++++++----
 2 files changed, 13 insertions(+), 6 deletions(-)

Comments

Yan, Zheng Nov. 21, 2019, 2:46 a.m. UTC | #1
On 11/20/19 4:29 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Even the MDS is in up:active state, but it also maybe laggy. Here
> will skip the laggy MDSs.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>   fs/ceph/mds_client.c |  6 ++++--
>   fs/ceph/mdsmap.c     | 13 +++++++++----
>   2 files changed, 13 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 82a929084671..a4e7026aaec9 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
>   				     frag.frag, mds,
>   				     (int)r, frag.ndist);
>   				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> -				    CEPH_MDS_STATE_ACTIVE)
> +				    CEPH_MDS_STATE_ACTIVE &&
> +				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>   					goto out;
>   			}
>   
> @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
>   				     "frag %u mds%d (auth)\n",
>   				     inode, ceph_vinop(inode), frag.frag, mds);
>   				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
> -				    CEPH_MDS_STATE_ACTIVE)
> +				    CEPH_MDS_STATE_ACTIVE &&
> +				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>   					goto out;
>   			}
>   		}
for use USE_AUTH_MDS case, request can only be handled by auth mds. 
client should send request to auth mds even it seems laggy.


> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 8b4f93e5b468..098669e6f1e4 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -13,6 +13,7 @@
>   
>   #include "super.h"
>   
> +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && !m->m_info[i].laggy)
>   
>   /*
>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>   	int i, j;
>   
>   	/* special case for one mds */
> -	if (1 == m->m_num_mds && m->m_info[0].state > 0)
> -		return 0;
> +	if (1 == m->m_num_mds && m->m_info[0].state > 0) {
> +		if (m->m_info[0].laggy)
> +			return -1;
> +		else
> +			return 0;
> +	}
>   
>   	/* count */
>   	for (i = 0; i < m->m_num_mds; i++)
> -		if (m->m_info[i].state > 0)
> +		if (CEPH_MDS_IS_READY(i))
>   			n++;
>   	if (n == 0)
>   		return -1;
> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>   	/* pick */
>   	n = prandom_u32() % n;
>   	for (j = 0, i = 0; i < m->m_num_mds; i++) {
> -		if (m->m_info[i].state > 0)
> +		if (CEPH_MDS_IS_READY(i))
>   			j++;
>   		if (j > n)
>   			break;
>
Xiubo Li Nov. 21, 2019, 5:24 a.m. UTC | #2
On 2019/11/21 10:46, Yan, Zheng wrote:
> On 11/20/19 4:29 PM, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Even the MDS is in up:active state, but it also maybe laggy. Here
>> will skip the laggy MDSs.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c |  6 ++++--
>>   fs/ceph/mdsmap.c     | 13 +++++++++----
>>   2 files changed, 13 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 82a929084671..a4e7026aaec9 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client 
>> *mdsc,
>>                        frag.frag, mds,
>>                        (int)r, frag.ndist);
>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>> -                    CEPH_MDS_STATE_ACTIVE)
>> +                    CEPH_MDS_STATE_ACTIVE &&
>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>                       goto out;
>>               }
>>   @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client 
>> *mdsc,
>>                        "frag %u mds%d (auth)\n",
>>                        inode, ceph_vinop(inode), frag.frag, mds);
>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>> -                    CEPH_MDS_STATE_ACTIVE)
>> +                    CEPH_MDS_STATE_ACTIVE &&
>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>                       goto out;
>>               }
>>           }
> for use USE_AUTH_MDS case, request can only be handled by auth mds. 
> client should send request to auth mds even it seems laggy.
>
BTW, what if the coreesponding auth mds was down, will it allow to 
choose other mds ? From the current code it seems might. Or as long as 
when the corresponding auth mds is in up:active state will the requests 
only could to be handled by it ?

Thanks.


>
>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>> index 8b4f93e5b468..098669e6f1e4 100644
>> --- a/fs/ceph/mdsmap.c
>> +++ b/fs/ceph/mdsmap.c
>> @@ -13,6 +13,7 @@
>>     #include "super.h"
>>   +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && 
>> !m->m_info[i].laggy)
>>     /*
>>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
>> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap 
>> *m)
>>       int i, j;
>>         /* special case for one mds */
>> -    if (1 == m->m_num_mds && m->m_info[0].state > 0)
>> -        return 0;
>> +    if (1 == m->m_num_mds && m->m_info[0].state > 0) {
>> +        if (m->m_info[0].laggy)
>> +            return -1;
>> +        else
>> +            return 0;
>> +    }
>>         /* count */
>>       for (i = 0; i < m->m_num_mds; i++)
>> -        if (m->m_info[i].state > 0)
>> +        if (CEPH_MDS_IS_READY(i))
>>               n++;
>>       if (n == 0)
>>           return -1;
>> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>>       /* pick */
>>       n = prandom_u32() % n;
>>       for (j = 0, i = 0; i < m->m_num_mds; i++) {
>> -        if (m->m_info[i].state > 0)
>> +        if (CEPH_MDS_IS_READY(i))
>>               j++;
>>           if (j > n)
>>               break;
>>
>
Yan, Zheng Nov. 21, 2019, 8:19 a.m. UTC | #3
On 11/21/19 1:24 PM, Xiubo Li wrote:
> On 2019/11/21 10:46, Yan, Zheng wrote:
>> On 11/20/19 4:29 PM, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> Even the MDS is in up:active state, but it also maybe laggy. Here
>>> will skip the laggy MDSs.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/mds_client.c |  6 ++++--
>>>   fs/ceph/mdsmap.c     | 13 +++++++++----
>>>   2 files changed, 13 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index 82a929084671..a4e7026aaec9 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client 
>>> *mdsc,
>>>                        frag.frag, mds,
>>>                        (int)r, frag.ndist);
>>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>>> -                    CEPH_MDS_STATE_ACTIVE)
>>> +                    CEPH_MDS_STATE_ACTIVE &&
>>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>>                       goto out;
>>>               }
>>>   @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client 
>>> *mdsc,
>>>                        "frag %u mds%d (auth)\n",
>>>                        inode, ceph_vinop(inode), frag.frag, mds);
>>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>>> -                    CEPH_MDS_STATE_ACTIVE)
>>> +                    CEPH_MDS_STATE_ACTIVE &&
>>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>>                       goto out;
>>>               }
>>>           }
>> for use USE_AUTH_MDS case, request can only be handled by auth mds. 
>> client should send request to auth mds even it seems laggy.
>>
> BTW, what if the coreesponding auth mds was down, will it allow to 
> choose other mds ? From the current code it seems might. Or as long as 
> when the corresponding auth mds is in up:active state will the requests 
> only could to be handled by it ?
> 

Some requests can only be handled by given MDS. Choosing other mds just 
wastes resource.



> Thanks.
> 
> 
>>
>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>> index 8b4f93e5b468..098669e6f1e4 100644
>>> --- a/fs/ceph/mdsmap.c
>>> +++ b/fs/ceph/mdsmap.c
>>> @@ -13,6 +13,7 @@
>>>     #include "super.h"
>>>   +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && 
>>> !m->m_info[i].laggy)
>>>     /*
>>>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
>>> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap 
>>> *m)
>>>       int i, j;
>>>         /* special case for one mds */
>>> -    if (1 == m->m_num_mds && m->m_info[0].state > 0)
>>> -        return 0;
>>> +    if (1 == m->m_num_mds && m->m_info[0].state > 0) {
>>> +        if (m->m_info[0].laggy)
>>> +            return -1;
>>> +        else
>>> +            return 0;
>>> +    }
>>>         /* count */
>>>       for (i = 0; i < m->m_num_mds; i++)
>>> -        if (m->m_info[i].state > 0)
>>> +        if (CEPH_MDS_IS_READY(i))
>>>               n++;
>>>       if (n == 0)
>>>           return -1;
>>> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>>>       /* pick */
>>>       n = prandom_u32() % n;
>>>       for (j = 0, i = 0; i < m->m_num_mds; i++) {
>>> -        if (m->m_info[i].state > 0)
>>> +        if (CEPH_MDS_IS_READY(i))
>>>               j++;
>>>           if (j > n)
>>>               break;
>>>
>>
>
Xiubo Li Nov. 21, 2019, 9:47 a.m. UTC | #4
On 2019/11/21 16:19, Yan, Zheng wrote:
> On 11/21/19 1:24 PM, Xiubo Li wrote:
>> On 2019/11/21 10:46, Yan, Zheng wrote:
>>> On 11/20/19 4:29 PM, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> Even the MDS is in up:active state, but it also maybe laggy. Here
>>>> will skip the laggy MDSs.
>>>>
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>   fs/ceph/mds_client.c |  6 ++++--
>>>>   fs/ceph/mdsmap.c     | 13 +++++++++----
>>>>   2 files changed, 13 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 82a929084671..a4e7026aaec9 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client 
>>>> *mdsc,
>>>>                        frag.frag, mds,
>>>>                        (int)r, frag.ndist);
>>>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>>>> -                    CEPH_MDS_STATE_ACTIVE)
>>>> +                    CEPH_MDS_STATE_ACTIVE &&
>>>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>>>                       goto out;
>>>>               }
>>>>   @@ -987,7 +988,8 @@ static int __choose_mds(struct 
>>>> ceph_mds_client *mdsc,
>>>>                        "frag %u mds%d (auth)\n",
>>>>                        inode, ceph_vinop(inode), frag.frag, mds);
>>>>                   if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
>>>> -                    CEPH_MDS_STATE_ACTIVE)
>>>> +                    CEPH_MDS_STATE_ACTIVE &&
>>>> +                    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
>>>>                       goto out;
>>>>               }
>>>>           }
>>> for use USE_AUTH_MDS case, request can only be handled by auth mds. 
>>> client should send request to auth mds even it seems laggy.
>>>
>> BTW, what if the coreesponding auth mds was down, will it allow to 
>> choose other mds ? From the current code it seems might. Or as long 
>> as when the corresponding auth mds is in up:active state will the 
>> requests only could to be handled by it ?
>>
>
> Some requests can only be handled by given MDS. Choosing other mds 
> just wastes resource.
>
>
Okay, will check it again.

Thanks Yan.

BRs


>
>> Thanks.
>>
>>
>>>
>>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>>> index 8b4f93e5b468..098669e6f1e4 100644
>>>> --- a/fs/ceph/mdsmap.c
>>>> +++ b/fs/ceph/mdsmap.c
>>>> @@ -13,6 +13,7 @@
>>>>     #include "super.h"
>>>>   +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && 
>>>> !m->m_info[i].laggy)
>>>>     /*
>>>>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
>>>> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct 
>>>> ceph_mdsmap *m)
>>>>       int i, j;
>>>>         /* special case for one mds */
>>>> -    if (1 == m->m_num_mds && m->m_info[0].state > 0)
>>>> -        return 0;
>>>> +    if (1 == m->m_num_mds && m->m_info[0].state > 0) {
>>>> +        if (m->m_info[0].laggy)
>>>> +            return -1;
>>>> +        else
>>>> +            return 0;
>>>> +    }
>>>>         /* count */
>>>>       for (i = 0; i < m->m_num_mds; i++)
>>>> -        if (m->m_info[i].state > 0)
>>>> +        if (CEPH_MDS_IS_READY(i))
>>>>               n++;
>>>>       if (n == 0)
>>>>           return -1;
>>>> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap 
>>>> *m)
>>>>       /* pick */
>>>>       n = prandom_u32() % n;
>>>>       for (j = 0, i = 0; i < m->m_num_mds; i++) {
>>>> -        if (m->m_info[i].state > 0)
>>>> +        if (CEPH_MDS_IS_READY(i))
>>>>               j++;
>>>>           if (j > n)
>>>>               break;
>>>>
>>>
>>
>

Patch
diff mbox series

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 82a929084671..a4e7026aaec9 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -972,7 +972,8 @@  static int __choose_mds(struct ceph_mds_client *mdsc,
 				     frag.frag, mds,
 				     (int)r, frag.ndist);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
+				    CEPH_MDS_STATE_ACTIVE &&
+				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
 					goto out;
 			}
 
@@ -987,7 +988,8 @@  static int __choose_mds(struct ceph_mds_client *mdsc,
 				     "frag %u mds%d (auth)\n",
 				     inode, ceph_vinop(inode), frag.frag, mds);
 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
-				    CEPH_MDS_STATE_ACTIVE)
+				    CEPH_MDS_STATE_ACTIVE &&
+				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
 					goto out;
 			}
 		}
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 8b4f93e5b468..098669e6f1e4 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -13,6 +13,7 @@ 
 
 #include "super.h"
 
+#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && !m->m_info[i].laggy)
 
 /*
  * choose a random mds that is "up" (i.e. has a state > 0), or -1.
@@ -23,12 +24,16 @@  int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	int i, j;
 
 	/* special case for one mds */
-	if (1 == m->m_num_mds && m->m_info[0].state > 0)
-		return 0;
+	if (1 == m->m_num_mds && m->m_info[0].state > 0) {
+		if (m->m_info[0].laggy)
+			return -1;
+		else
+			return 0;
+	}
 
 	/* count */
 	for (i = 0; i < m->m_num_mds; i++)
-		if (m->m_info[i].state > 0)
+		if (CEPH_MDS_IS_READY(i))
 			n++;
 	if (n == 0)
 		return -1;
@@ -36,7 +41,7 @@  int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 	/* pick */
 	n = prandom_u32() % n;
 	for (j = 0, i = 0; i < m->m_num_mds; i++) {
-		if (m->m_info[i].state > 0)
+		if (CEPH_MDS_IS_READY(i))
 			j++;
 		if (j > n)
 			break;