diff mbox

[1/2] No MDS mount error fix

Message ID 1b8c4b9c49deb956e9d065b0677bfeaf17c968d2.1386442053.git.plageat90@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mikhail Campos Guadamuz Dec. 7, 2013, 6:59 p.m. UTC
For http://tracker.ceph.com/issues/4386

It determines situation, when a user is trying to mount CephFS
with no MDS present. Return ECOMM from
open_root_dentry which can be analyzed then by ceph.mount

Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
---
 fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
 fs/ceph/super.c             | 10 +++++++++-
 include/linux/ceph/mdsmap.h |  1 +
 3 files changed, 26 insertions(+), 4 deletions(-)

Comments

Yan, Zheng Dec. 7, 2013, 1:59 p.m. UTC | #1
On Sun, Dec 8, 2013 at 2:59 AM, Mikhail Campos Guadamuz
<plageat90@gmail.com> wrote:
> For http://tracker.ceph.com/issues/4386
>
> It determines situation, when a user is trying to mount CephFS
> with no MDS present. Return ECOMM from
> open_root_dentry which can be analyzed then by ceph.mount
>
> Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
> ---
>  fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>  fs/ceph/super.c             | 10 +++++++++-
>  include/linux/ceph/mdsmap.h |  1 +
>  3 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 132b64e..3a6ba8a 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -12,6 +12,20 @@
>
>  #include "super.h"
>
> +/*
> + * count active mds's
> + */
> +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
> +{
> +    int n = 0;
> +    int i;
> +
> +    for(i = 0; i < m->m_max_mds; ++i)
> +       if(m->m_info[i].state > 0)
> +           ++n;
> +
> +    return  n;
> +}
>
>  /*
>   * choose a random mds that is "up" (i.e. has a state > 0), or -1.
> @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>                 return 0;
>
>         /* count */
> -       for (i = 0; i < m->m_max_mds; i++)
> -               if (m->m_info[i].state > 0)
> -                       n++;
> +       n = ceph_mdsmap_active_mds_count(m);
> +
>         if (n == 0)
>                 return -1;
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 6627b26..4d33d68 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>         struct ceph_mds_request *req = NULL;
>         int err;
>         struct dentry *root;
> -
> +
> +       /* check for mds*/
> +       if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
> +       {
> +           pr_info("active mds not found, possible not exist\n");
> +           root = ERR_PTR( -ECOMM );
> +           return root;
> +       }

I think we should return error immediately only if there is no mds
(mdsmap->m_max_mds == 0).  If there is inactive mds, we should stick
to current behavior (wait until timeout). Furthermore please don't use
uncommon error code ECOMM, use common error code such as EINVAL.

Regards
Yan, Zheng


> +
>         /* open dir */
>         dout("open_root_inode opening '%s'\n", path);
>         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> index 87ed09f..4d7d502 100644
> --- a/include/linux/ceph/mdsmap.h
> +++ b/include/linux/ceph/mdsmap.h
> @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
>         return false;
>  }
>
> +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
>  extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
>  extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
>  extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
> --
> 1.8.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dzianis Huznou Dec. 9, 2013, 12:02 p.m. UTC | #2
On Sat, 2013-12-07 at 21:59 +0800, Yan, Zheng wrote:
> On Sun, Dec 8, 2013 at 2:59 AM, Mikhail Campos Guadamuz
> <plageat90@gmail.com> wrote:
> > For http://tracker.ceph.com/issues/4386
> >
> > It determines situation, when a user is trying to mount CephFS
> > with no MDS present. Return ECOMM from
> > open_root_dentry which can be analyzed then by ceph.mount
> >
> > Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
> > ---
> >  fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
> >  fs/ceph/super.c             | 10 +++++++++-
> >  include/linux/ceph/mdsmap.h |  1 +
> >  3 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> > index 132b64e..3a6ba8a 100644
> > --- a/fs/ceph/mdsmap.c
> > +++ b/fs/ceph/mdsmap.c
> > @@ -12,6 +12,20 @@
> >
> >  #include "super.h"
> >
> > +/*
> > + * count active mds's
> > + */
> > +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
> > +{
> > +    int n = 0;
> > +    int i;
> > +
> > +    for(i = 0; i < m->m_max_mds; ++i)
> > +       if(m->m_info[i].state > 0)
> > +           ++n;
> > +
> > +    return  n;
> > +}
> >
> >  /*
> >   * choose a random mds that is "up" (i.e. has a state > 0), or -1.
> > @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
> >                 return 0;
> >
> >         /* count */
> > -       for (i = 0; i < m->m_max_mds; i++)
> > -               if (m->m_info[i].state > 0)
> > -                       n++;
> > +       n = ceph_mdsmap_active_mds_count(m);
> > +
> >         if (n == 0)
> >                 return -1;
> >
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index 6627b26..4d33d68 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
> >         struct ceph_mds_request *req = NULL;
> >         int err;
> >         struct dentry *root;
> > -
> > +
> > +       /* check for mds*/
> > +       if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
> > +       {
> > +           pr_info("active mds not found, possible not exist\n");
> > +           root = ERR_PTR( -ECOMM );
> > +           return root;
> > +       }
> 
> I think we should return error immediately only if there is no mds
> (mdsmap->m_max_mds == 0).  If there is inactive mds, we should stick
> to current behavior (wait until timeout). Furthermore please don't use
> uncommon error code ECOMM, use common error code such as EINVAL.
> 
> Regards
> Yan, Zheng
> 
> 
> > +
> >         /* open dir */
> >         dout("open_root_inode opening '%s'\n", path);
> >         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
> > diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> > index 87ed09f..4d7d502 100644
> > --- a/include/linux/ceph/mdsmap.h
> > +++ b/include/linux/ceph/mdsmap.h
> > @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
> >         return false;
> >  }
> >
> > +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
> >  extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
> >  extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
> >  extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

I think we can't check mds presence by (mdsmap->m_max_mds == 0)
statement, because max_mds value does not strictly represent the number
of mds. For example, we have max_mds = 1 (by default) no matter have we
one mds or haven't. Is this a expected behavior?


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng Dec. 9, 2013, 1:13 p.m. UTC | #3
On Mon, Dec 9, 2013 at 8:02 PM, Dzianis Huznou <dzianis_huznou@epam.com> wrote:
> On Sat, 2013-12-07 at 21:59 +0800, Yan, Zheng wrote:
>> On Sun, Dec 8, 2013 at 2:59 AM, Mikhail Campos Guadamuz
>> <plageat90@gmail.com> wrote:
>> > For http://tracker.ceph.com/issues/4386
>> >
>> > It determines situation, when a user is trying to mount CephFS
>> > with no MDS present. Return ECOMM from
>> > open_root_dentry which can be analyzed then by ceph.mount
>> >
>> > Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
>> > ---
>> >  fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>> >  fs/ceph/super.c             | 10 +++++++++-
>> >  include/linux/ceph/mdsmap.h |  1 +
>> >  3 files changed, 26 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>> > index 132b64e..3a6ba8a 100644
>> > --- a/fs/ceph/mdsmap.c
>> > +++ b/fs/ceph/mdsmap.c
>> > @@ -12,6 +12,20 @@
>> >
>> >  #include "super.h"
>> >
>> > +/*
>> > + * count active mds's
>> > + */
>> > +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
>> > +{
>> > +    int n = 0;
>> > +    int i;
>> > +
>> > +    for(i = 0; i < m->m_max_mds; ++i)
>> > +       if(m->m_info[i].state > 0)
>> > +           ++n;
>> > +
>> > +    return  n;
>> > +}
>> >
>> >  /*
>> >   * choose a random mds that is "up" (i.e. has a state > 0), or -1.
>> > @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>> >                 return 0;
>> >
>> >         /* count */
>> > -       for (i = 0; i < m->m_max_mds; i++)
>> > -               if (m->m_info[i].state > 0)
>> > -                       n++;
>> > +       n = ceph_mdsmap_active_mds_count(m);
>> > +
>> >         if (n == 0)
>> >                 return -1;
>> >
>> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> > index 6627b26..4d33d68 100644
>> > --- a/fs/ceph/super.c
>> > +++ b/fs/ceph/super.c
>> > @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>> >         struct ceph_mds_request *req = NULL;
>> >         int err;
>> >         struct dentry *root;
>> > -
>> > +
>> > +       /* check for mds*/
>> > +       if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
>> > +       {
>> > +           pr_info("active mds not found, possible not exist\n");
>> > +           root = ERR_PTR( -ECOMM );
>> > +           return root;
>> > +       }
>>
>> I think we should return error immediately only if there is no mds
>> (mdsmap->m_max_mds == 0).  If there is inactive mds, we should stick
>> to current behavior (wait until timeout). Furthermore please don't use
>> uncommon error code ECOMM, use common error code such as EINVAL.
>>
>> Regards
>> Yan, Zheng
>>
>>
>> > +
>> >         /* open dir */
>> >         dout("open_root_inode opening '%s'\n", path);
>> >         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>> > diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
>> > index 87ed09f..4d7d502 100644
>> > --- a/include/linux/ceph/mdsmap.h
>> > +++ b/include/linux/ceph/mdsmap.h
>> > @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
>> >         return false;
>> >  }
>> >
>> > +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
>> >  extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
>> >  extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
>> >  extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
>> > --
>> > 1.8.3.1
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> --
>> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> I think we can't check mds presence by (mdsmap->m_max_mds == 0)
> statement, because max_mds value does not strictly represent the number
> of mds. For example, we have max_mds = 1 (by default) no matter have we
> one mds or haven't. Is this a expected behavior?
>

I think so. mds may start later, so we should wait until timeout. nfs
has the similar behavior.

Regards
Yan, Zheng
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Li Wang Dec. 9, 2013, 2:26 p.m. UTC | #4
Personally, I don't think there is issue for current implementation, 
either. If no ACTIVE mds, the mount process put to wait, until updated 
MDS map received and with active mds present indicated in the map, it 
will be waked up and go on the mount process, otherwise, EIO returned if 
timeout. If it is boring to hang for a long time, you can specify a 
shorter mount timeout.

On 2013/12/8 2:59, Mikhail Campos Guadamuz wrote:
> For http://tracker.ceph.com/issues/4386
>
> It determines situation, when a user is trying to mount CephFS
> with no MDS present. Return ECOMM from
> open_root_dentry which can be analyzed then by ceph.mount
>
> Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
> ---
>   fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>   fs/ceph/super.c             | 10 +++++++++-
>   include/linux/ceph/mdsmap.h |  1 +
>   3 files changed, 26 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> index 132b64e..3a6ba8a 100644
> --- a/fs/ceph/mdsmap.c
> +++ b/fs/ceph/mdsmap.c
> @@ -12,6 +12,20 @@
>
>   #include "super.h"
>
> +/*
> + * count active mds's
> + */
> +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
> +{
> +    int n = 0;
> +    int i;
> +
> +    for(i = 0; i < m->m_max_mds; ++i)
> +	if(m->m_info[i].state > 0)
> +	    ++n;
> +
> +    return  n;
> +}
>
>   /*
>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
> @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>   		return 0;
>
>   	/* count */
> -	for (i = 0; i < m->m_max_mds; i++)
> -		if (m->m_info[i].state > 0)
> -			n++;
> +	n = ceph_mdsmap_active_mds_count(m);
> +	
>   	if (n == 0)
>   		return -1;
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 6627b26..4d33d68 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
>   	struct ceph_mds_request *req = NULL;
>   	int err;
>   	struct dentry *root;
> -
> +	
> +	/* check for mds*/
> +	if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
> +	{
> +	    pr_info("active mds not found, possible not exist\n");
> +	    root = ERR_PTR( -ECOMM );
> +	    return root;
> +	}
> +	
>   	/* open dir */
>   	dout("open_root_inode opening '%s'\n", path);
>   	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> index 87ed09f..4d7d502 100644
> --- a/include/linux/ceph/mdsmap.h
> +++ b/include/linux/ceph/mdsmap.h
> @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
>   	return false;
>   }
>
> +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
>   extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
>   extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
>   extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Li Wang Dec. 9, 2013, 2:41 p.m. UTC | #5
Well, after double-checking the code, it seems the wait process will be 
unconditionally waked up if new MDS map received. Is there a situation 
that the client is pushed a new MDS map, but still no mds active. If so,
maybe worth a little bit optimization such as calling check_new_map() to 
avoid the client be uselessly waked up ...

On 2013/12/9 22:26, Li Wang wrote:
> Personally, I don't think there is issue for current implementation,
> either. If no ACTIVE mds, the mount process put to wait, until updated
> MDS map received and with active mds present indicated in the map, it
> will be waked up and go on the mount process, otherwise, EIO returned if
> timeout. If it is boring to hang for a long time, you can specify a
> shorter mount timeout.
>
> On 2013/12/8 2:59, Mikhail Campos Guadamuz wrote:
>> For http://tracker.ceph.com/issues/4386
>>
>> It determines situation, when a user is trying to mount CephFS
>> with no MDS present. Return ECOMM from
>> open_root_dentry which can be analyzed then by ceph.mount
>>
>> Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
>> ---
>>   fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>>   fs/ceph/super.c             | 10 +++++++++-
>>   include/linux/ceph/mdsmap.h |  1 +
>>   3 files changed, 26 insertions(+), 4 deletions(-)
>>
>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>> index 132b64e..3a6ba8a 100644
>> --- a/fs/ceph/mdsmap.c
>> +++ b/fs/ceph/mdsmap.c
>> @@ -12,6 +12,20 @@
>>
>>   #include "super.h"
>>
>> +/*
>> + * count active mds's
>> + */
>> +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
>> +{
>> +    int n = 0;
>> +    int i;
>> +
>> +    for(i = 0; i < m->m_max_mds; ++i)
>> +    if(m->m_info[i].state > 0)
>> +        ++n;
>> +
>> +    return  n;
>> +}
>>
>>   /*
>>    * choose a random mds that is "up" (i.e. has a state > 0), or -1.
>> @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
>>           return 0;
>>
>>       /* count */
>> -    for (i = 0; i < m->m_max_mds; i++)
>> -        if (m->m_info[i].state > 0)
>> -            n++;
>> +    n = ceph_mdsmap_active_mds_count(m);
>> +
>>       if (n == 0)
>>           return -1;
>>
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 6627b26..4d33d68 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct
>> ceph_fs_client *fsc,
>>       struct ceph_mds_request *req = NULL;
>>       int err;
>>       struct dentry *root;
>> -
>> +
>> +    /* check for mds*/
>> +    if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
>> +    {
>> +        pr_info("active mds not found, possible not exist\n");
>> +        root = ERR_PTR( -ECOMM );
>> +        return root;
>> +    }
>> +
>>       /* open dir */
>>       dout("open_root_inode opening '%s'\n", path);
>>       req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR,
>> USE_ANY_MDS);
>> diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
>> index 87ed09f..4d7d502 100644
>> --- a/include/linux/ceph/mdsmap.h
>> +++ b/include/linux/ceph/mdsmap.h
>> @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct
>> ceph_mdsmap *m, int w)
>>       return false;
>>   }
>>
>> +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
>>   extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
>>   extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
>>   extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
>>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Li Wang Dec. 10, 2013, 1:25 a.m. UTC | #6
Then we have to make a choice between immediately returning with error 
and patiently waiting for mds joining. My suggestion is
(1) Leave an error message from the kernel using 'printk(KERN_WARN"no 
active mds")' something in __choose_mds()
(2) Add a return value 'E_WAITING_FOR_MAP' to __choose_mds(), and 
capture it in ceph_mdsc_do_request(), if the user feel boring to CRTL+C 
to kill the mount process (user should at least know how to interrupt 
the mount :) ), then ceph_mdsc_do_request() know that it is interrupted 
while waiting for new map, then return good error message to user.

On 2013/12/9 22:50, ?????? ?????? wrote:
> I agree with some points. But this patches originally created to fix
> "confusing for new users for hard-understandable return messages" (based
> on http://tracker.ceph.com/issues/4386). The idea was to return a
> different error code, which can be than handled by ceph.mount client for
> printing simple message about "what's going on".
>
>
> 2013/12/9 Li Wang <liwang@ubuntukylin.com <mailto:liwang@ubuntukylin.com>>
>
>     Personally, I don't think there is issue for current implementation,
>     either. If no ACTIVE mds, the mount process put to wait, until
>     updated MDS map received and with active mds present indicated in
>     the map, it will be waked up and go on the mount process, otherwise,
>     EIO returned if timeout. If it is boring to hang for a long time,
>     you can specify a shorter mount timeout.
>
>
>     On 2013/12/8 2:59, Mikhail Campos Guadamuz wrote:
>
>         For http://tracker.ceph.com/__issues/4386
>         <http://tracker.ceph.com/issues/4386>
>
>         It determines situation, when a user is trying to mount CephFS
>         with no MDS present. Return ECOMM from
>         open_root_dentry which can be analyzed then by ceph.mount
>
>         Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com
>         <mailto:plageat90@gmail.com>>
>         ---
>            fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>            fs/ceph/super.c             | 10 +++++++++-
>            include/linux/ceph/mdsmap.h |  1 +
>            3 files changed, 26 insertions(+), 4 deletions(-)
>
>         diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>         index 132b64e..3a6ba8a 100644
>         --- a/fs/ceph/mdsmap.c
>         +++ b/fs/ceph/mdsmap.c
>         @@ -12,6 +12,20 @@
>
>            #include "super.h"
>
>         +/*
>         + * count active mds's
>         + */
>         +int ceph_mdsmap_active_mds_count(__struct ceph_mdsmap *m)
>         +{
>         +    int n = 0;
>         +    int i;
>         +
>         +    for(i = 0; i < m->m_max_mds; ++i)
>         +       if(m->m_info[i].state > 0)
>         +           ++n;
>         +
>         +    return  n;
>         +}
>
>            /*
>             * choose a random mds that is "up" (i.e. has a state > 0),
>         or -1.
>         @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(__struct
>         ceph_mdsmap *m)
>                          return 0;
>
>                  /* count */
>         -       for (i = 0; i < m->m_max_mds; i++)
>         -               if (m->m_info[i].state > 0)
>         -                       n++;
>         +       n = ceph_mdsmap_active_mds_count(__m);
>         +
>                  if (n == 0)
>                          return -1;
>
>         diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>         index 6627b26..4d33d68 100644
>         --- a/fs/ceph/super.c
>         +++ b/fs/ceph/super.c
>         @@ -674,7 +674,15 @@ static struct dentry
>         *open_root_dentry(struct ceph_fs_client *fsc,
>                  struct ceph_mds_request *req = NULL;
>                  int err;
>                  struct dentry *root;
>         -
>         +
>         +       /* check for mds*/
>         +       if( 0 == ceph_mdsmap_active_mds_count(__mdsc->mdsmap) )
>         +       {
>         +           pr_info("active mds not found, possible not exist\n");
>         +           root = ERR_PTR( -ECOMM );
>         +           return root;
>         +       }
>         +
>                  /* open dir */
>                  dout("open_root_inode opening '%s'\n", path);
>                  req = ceph_mdsc_create_request(mdsc,
>         CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>         diff --git a/include/linux/ceph/mdsmap.h
>         b/include/linux/ceph/mdsmap.h
>         index 87ed09f..4d7d502 100644
>         --- a/include/linux/ceph/mdsmap.h
>         +++ b/include/linux/ceph/mdsmap.h
>         @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct
>         ceph_mdsmap *m, int w)
>                  return false;
>            }
>
>         +extern int ceph_mdsmap_active_mds_count(__struct ceph_mdsmap *m);
>            extern int ceph_mdsmap_get_random_mds(__struct ceph_mdsmap *m);
>            extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>         *end);
>            extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
>
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mikhail_Campos-Guadamuz Dec. 11, 2013, 2:23 p.m. UTC | #7
On 12/10/2013 04:25 AM, Li Wang wrote:
> Then we have to make a choice between immediately returning with error 
> and patiently waiting for mds joining. My suggestion is
> (1) Leave an error message from the kernel using 'printk(KERN_WARN"no 
> active mds")' something in __choose_mds()
> (2) Add a return value 'E_WAITING_FOR_MAP' to __choose_mds(), and 
> capture it in ceph_mdsc_do_request(), if the user feel boring to 
> CRTL+C to kill the mount process (user should at least know how to 
> interrupt the mount :) ), then ceph_mdsc_do_request() know that it is 
> interrupted while waiting for new map, then return good error message 
> to user.
>
> On 2013/12/9 22:50, ?????? ?????? wrote:
>> I agree with some points. But this patches originally created to fix
>> "confusing for new users for hard-understandable return messages" (based
>> on http://tracker.ceph.com/issues/4386). The idea was to return a
>> different error code, which can be than handled by ceph.mount client for
>> printing simple message about "what's going on".
>>
>>
>> 2013/12/9 Li Wang <liwang@ubuntukylin.com 
>> <mailto:liwang@ubuntukylin.com>>
>>
>>     Personally, I don't think there is issue for current implementation,
>>     either. If no ACTIVE mds, the mount process put to wait, until
>>     updated MDS map received and with active mds present indicated in
>>     the map, it will be waked up and go on the mount process, otherwise,
>>     EIO returned if timeout. If it is boring to hang for a long time,
>>     you can specify a shorter mount timeout.
>>
>>
>>     On 2013/12/8 2:59, Mikhail Campos Guadamuz wrote:
>>
>>         For http://tracker.ceph.com/__issues/4386
>>         <http://tracker.ceph.com/issues/4386>
>>
>>         It determines situation, when a user is trying to mount CephFS
>>         with no MDS present. Return ECOMM from
>>         open_root_dentry which can be analyzed then by ceph.mount
>>
>>         Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com
>>         <mailto:plageat90@gmail.com>>
>>         ---
>>            fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
>>            fs/ceph/super.c             | 10 +++++++++-
>>            include/linux/ceph/mdsmap.h |  1 +
>>            3 files changed, 26 insertions(+), 4 deletions(-)
>>
>>         diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
>>         index 132b64e..3a6ba8a 100644
>>         --- a/fs/ceph/mdsmap.c
>>         +++ b/fs/ceph/mdsmap.c
>>         @@ -12,6 +12,20 @@
>>
>>            #include "super.h"
>>
>>         +/*
>>         + * count active mds's
>>         + */
>>         +int ceph_mdsmap_active_mds_count(__struct ceph_mdsmap *m)
>>         +{
>>         +    int n = 0;
>>         +    int i;
>>         +
>>         +    for(i = 0; i < m->m_max_mds; ++i)
>>         +       if(m->m_info[i].state > 0)
>>         +           ++n;
>>         +
>>         +    return  n;
>>         +}
>>
>>            /*
>>             * choose a random mds that is "up" (i.e. has a state > 0),
>>         or -1.
>>         @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(__struct
>>         ceph_mdsmap *m)
>>                          return 0;
>>
>>                  /* count */
>>         -       for (i = 0; i < m->m_max_mds; i++)
>>         -               if (m->m_info[i].state > 0)
>>         -                       n++;
>>         +       n = ceph_mdsmap_active_mds_count(__m);
>>         +
>>                  if (n == 0)
>>                          return -1;
>>
>>         diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>         index 6627b26..4d33d68 100644
>>         --- a/fs/ceph/super.c
>>         +++ b/fs/ceph/super.c
>>         @@ -674,7 +674,15 @@ static struct dentry
>>         *open_root_dentry(struct ceph_fs_client *fsc,
>>                  struct ceph_mds_request *req = NULL;
>>                  int err;
>>                  struct dentry *root;
>>         -
>>         +
>>         +       /* check for mds*/
>>         +       if( 0 == ceph_mdsmap_active_mds_count(__mdsc->mdsmap) )
>>         +       {
>>         +           pr_info("active mds not found, possible not 
>> exist\n");
>>         +           root = ERR_PTR( -ECOMM );
>>         +           return root;
>>         +       }
>>         +
>>                  /* open dir */
>>                  dout("open_root_inode opening '%s'\n", path);
>>                  req = ceph_mdsc_create_request(mdsc,
>>         CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
>>         diff --git a/include/linux/ceph/mdsmap.h
>>         b/include/linux/ceph/mdsmap.h
>>         index 87ed09f..4d7d502 100644
>>         --- a/include/linux/ceph/mdsmap.h
>>         +++ b/include/linux/ceph/mdsmap.h
>>         @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct
>>         ceph_mdsmap *m, int w)
>>                  return false;
>>            }
>>
>>         +extern int ceph_mdsmap_active_mds_count(__struct ceph_mdsmap 
>> *m);
>>            extern int ceph_mdsmap_get_random_mds(__struct ceph_mdsmap 
>> *m);
>>            extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void
>>         *end);
>>            extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
>>
>>
> -- 
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks for suggestion. But I cannot understand one thing. We need to 
print message to console about no active mds. Since we can not print 
messages to console from kernel client, we need to return unique 
external error from kernel mount, which can be handled then by 
ceph.mount (and printed to console). All suitable errors are already in 
use (EIO, EINVAL etc.) for other error notification. Can you explain, 
which suitable "common errors" can we use for this purpose? Is there 
another solution for printing error to console?


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dzianis Huznou Dec. 17, 2013, 11:28 a.m. UTC | #8
Just a one question. If you are saying about "common error", what
exactly do you mean? Do you mean some error number range that we can
only use? Which unique(!) error can we return from ceph kernel client if
no mds found?

On Sat, 2013-12-07 at 21:59 +0800, Yan, Zheng wrote:
> On Sun, Dec 8, 2013 at 2:59 AM, Mikhail Campos Guadamuz
> <plageat90@gmail.com> wrote:
> > For http://tracker.ceph.com/issues/4386
> >
> > It determines situation, when a user is trying to mount CephFS
> > with no MDS present. Return ECOMM from
> > open_root_dentry which can be analyzed then by ceph.mount
> >
> > Signed-off-by: Mikhail Campos Guadamuz <plageat90@gmail.com>
> > ---
> >  fs/ceph/mdsmap.c            | 19 ++++++++++++++++---
> >  fs/ceph/super.c             | 10 +++++++++-
> >  include/linux/ceph/mdsmap.h |  1 +
> >  3 files changed, 26 insertions(+), 4 deletions(-)
> >
> > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
> > index 132b64e..3a6ba8a 100644
> > --- a/fs/ceph/mdsmap.c
> > +++ b/fs/ceph/mdsmap.c
> > @@ -12,6 +12,20 @@
> >
> >  #include "super.h"
> >
> > +/*
> > + * count active mds's
> > + */
> > +int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
> > +{
> > +    int n = 0;
> > +    int i;
> > +
> > +    for(i = 0; i < m->m_max_mds; ++i)
> > +       if(m->m_info[i].state > 0)
> > +           ++n;
> > +
> > +    return  n;
> > +}
> >
> >  /*
> >   * choose a random mds that is "up" (i.e. has a state > 0), or -1.
> > @@ -26,9 +40,8 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
> >                 return 0;
> >
> >         /* count */
> > -       for (i = 0; i < m->m_max_mds; i++)
> > -               if (m->m_info[i].state > 0)
> > -                       n++;
> > +       n = ceph_mdsmap_active_mds_count(m);
> > +
> >         if (n == 0)
> >                 return -1;
> >
> > diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> > index 6627b26..4d33d68 100644
> > --- a/fs/ceph/super.c
> > +++ b/fs/ceph/super.c
> > @@ -674,7 +674,15 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
> >         struct ceph_mds_request *req = NULL;
> >         int err;
> >         struct dentry *root;
> > -
> > +
> > +       /* check for mds*/
> > +       if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
> > +       {
> > +           pr_info("active mds not found, possible not exist\n");
> > +           root = ERR_PTR( -ECOMM );
> > +           return root;
> > +       }
> 
> I think we should return error immediately only if there is no mds
> (mdsmap->m_max_mds == 0).  If there is inactive mds, we should stick
> to current behavior (wait until timeout). Furthermore please don't use
> uncommon error code ECOMM, use common error code such as EINVAL.
> 
> Regards
> Yan, Zheng
> 
> 
> > +
> >         /* open dir */
> >         dout("open_root_inode opening '%s'\n", path);
> >         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
> > diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
> > index 87ed09f..4d7d502 100644
> > --- a/include/linux/ceph/mdsmap.h
> > +++ b/include/linux/ceph/mdsmap.h
> > @@ -56,6 +56,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
> >         return false;
> >  }
> >
> > +extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
> >  extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
> >  extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
> >  extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
> > --
> > 1.8.3.1
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 132b64e..3a6ba8a 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -12,6 +12,20 @@ 
 
 #include "super.h"
 
+/*
+ * count active mds's
+ */
+int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m)
+{
+    int n = 0;
+    int i;
+    
+    for(i = 0; i < m->m_max_mds; ++i)
+	if(m->m_info[i].state > 0)
+	    ++n;
+    
+    return  n;
+}
 
 /*
  * choose a random mds that is "up" (i.e. has a state > 0), or -1.
@@ -26,9 +40,8 @@  int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
 		return 0;
 
 	/* count */
-	for (i = 0; i < m->m_max_mds; i++)
-		if (m->m_info[i].state > 0)
-			n++;
+	n = ceph_mdsmap_active_mds_count(m);
+	
 	if (n == 0)
 		return -1;
 
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 6627b26..4d33d68 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -674,7 +674,15 @@  static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 	struct ceph_mds_request *req = NULL;
 	int err;
 	struct dentry *root;
-
+	
+	/* check for mds*/
+	if( 0 == ceph_mdsmap_active_mds_count(mdsc->mdsmap) )
+	{    
+	    pr_info("active mds not found, possible not exist\n");
+	    root = ERR_PTR( -ECOMM );
+	    return root;
+	}
+	
 	/* open dir */
 	dout("open_root_inode opening '%s'\n", path);
 	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 87ed09f..4d7d502 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -56,6 +56,7 @@  static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
 	return false;
 }
 
+extern int ceph_mdsmap_active_mds_count(struct ceph_mdsmap *m);
 extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
 extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
 extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);