ceph: abort osd requests on force umount
diff mbox

Message ID 20180511091202.65250-1-zyan@redhat.com
State New
Headers show

Commit Message

Yan, Zheng May 11, 2018, 9:12 a.m. UTC
this avoid force umount getting stuck at ceph_osdc_sync()

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/super.c                 |  1 +
 include/linux/ceph/osd_client.h |  5 ++++-
 net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
 3 files changed, 43 insertions(+), 6 deletions(-)

Comments

Luis Henriques May 11, 2018, 10:32 a.m. UTC | #1
"Yan, Zheng" <zyan@redhat.com> writes:

> this avoid force umount getting stuck at ceph_osdc_sync()
>
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/super.c                 |  1 +
>  include/linux/ceph/osd_client.h |  5 ++++-
>  net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 3c1155803444..40664e13cc0f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>  	if (!fsc)
>  		return;
>  	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
> +	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>  	ceph_mdsc_force_umount(fsc->mdsc);
>  	return;
>  }
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index b73dd7ebe585..f61736963236 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>  	struct rb_root         linger_map_checks;
>  	atomic_t               num_requests;
>  	atomic_t               num_homeless;
> +	int		       abort_code;
>  	struct delayed_work    timeout_work;
>  	struct delayed_work    osds_timeout_work;
>  #ifdef CONFIG_DEBUG_FS
> @@ -377,7 +378,9 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
>  				   struct ceph_msg *msg);
>  extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
>  				 struct ceph_msg *msg);
> -void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
> +extern void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32);

nit: Why did you drop the arg name 'eb'?

> +extern void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, +
> int err_code);
>  
>  extern void osd_req_op_init(struct ceph_osd_request *osd_req,
>  			    unsigned int which, u16 opcode, u32 flags);
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 08b5fc1f90cc..959af0e165ba 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -2165,9 +2165,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
>  	struct ceph_osd_client *osdc = req->r_osdc;
>  	struct ceph_osd *osd;
>  	enum calc_target_result ct_res;
> +	bool abort_code = 0;

This should be an int, not a bool.

Cheers,
David Disseldorp May 11, 2018, noon UTC | #2
On Fri, 11 May 2018 17:12:02 +0800, Yan, Zheng wrote:

> this avoid force umount getting stuck at ceph_osdc_sync()
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/super.c                 |  1 +
>  include/linux/ceph/osd_client.h |  5 ++++-
>  net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 3c1155803444..40664e13cc0f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>  	if (!fsc)
>  		return;
>  	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
> +	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);

Does the unmount need to guarantee that the failed requests won't be
processed by any OSDs, or is local cancellation fine here?

Cheers, David
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov May 11, 2018, 12:06 p.m. UTC | #3
On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
> this avoid force umount getting stuck at ceph_osdc_sync()
>
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/super.c                 |  1 +
>  include/linux/ceph/osd_client.h |  5 ++++-
>  net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>  3 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
> index 3c1155803444..40664e13cc0f 100644
> --- a/fs/ceph/super.c
> +++ b/fs/ceph/super.c
> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>         if (!fsc)
>                 return;
>         fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>         ceph_mdsc_force_umount(fsc->mdsc);
>         return;
>  }
> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
> index b73dd7ebe585..f61736963236 100644
> --- a/include/linux/ceph/osd_client.h
> +++ b/include/linux/ceph/osd_client.h
> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>         struct rb_root         linger_map_checks;
>         atomic_t               num_requests;
>         atomic_t               num_homeless;
> +       int                    abort_code;

Why osdc->abort_code and all __submit_request() hunks are needed?
If we are in a forced umount situation, no new I/Os should be accepted
anyway.

osdc->lock taken for write in ceph_osdc_abort_requests() is enough to
lock out __submit_request().

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng May 11, 2018, 11:24 p.m. UTC | #4
> On May 11, 2018, at 20:00, David Disseldorp <ddiss@suse.de> wrote:
> 
> On Fri, 11 May 2018 17:12:02 +0800, Yan, Zheng wrote:
> 
>> this avoid force umount getting stuck at ceph_osdc_sync()
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/super.c                 |  1 +
>> include/linux/ceph/osd_client.h |  5 ++++-
>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>> 3 files changed, 43 insertions(+), 6 deletions(-)
>> 
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 3c1155803444..40664e13cc0f 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>> 	if (!fsc)
>> 		return;
>> 	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>> +	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
> 
> Does the unmount need to guarantee that the failed requests won't be
> processed by any OSDs, or is local cancellation fine here?
> 

umount -f does not need to  guarantee that

Regards
Yan, Zheng
> Cheers, David

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng May 11, 2018, 11:38 p.m. UTC | #5
> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> this avoid force umount getting stuck at ceph_osdc_sync()
>> 
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>> fs/ceph/super.c                 |  1 +
>> include/linux/ceph/osd_client.h |  5 ++++-
>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>> 3 files changed, 43 insertions(+), 6 deletions(-)
>> 
>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>> index 3c1155803444..40664e13cc0f 100644
>> --- a/fs/ceph/super.c
>> +++ b/fs/ceph/super.c
>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>>        if (!fsc)
>>                return;
>>        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>>        ceph_mdsc_force_umount(fsc->mdsc);
>>        return;
>> }
>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>> index b73dd7ebe585..f61736963236 100644
>> --- a/include/linux/ceph/osd_client.h
>> +++ b/include/linux/ceph/osd_client.h
>> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>>        struct rb_root         linger_map_checks;
>>        atomic_t               num_requests;
>>        atomic_t               num_homeless;
>> +       int                    abort_code;
> 
> Why osdc->abort_code and all __submit_request() hunks are needed?
> If we are in a forced umount situation, no new I/Os should be accepted
> anyway.

No code guarantees that ceph_writepages_start()/writepage_nounlock() are
not being executed when user does forced umount.  They may start new
osd requests after forced umount.

Regards
Yan, Zheng



> 
> osdc->lock taken for write in ceph_osdc_abort_requests() is enough to
> lock out __submit_request().
> 
> Thanks,
> 
>                Ilya

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov May 14, 2018, 8:37 a.m. UTC | #6
On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
>
>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>> this avoid force umount getting stuck at ceph_osdc_sync()
>>>
>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>> ---
>>> fs/ceph/super.c                 |  1 +
>>> include/linux/ceph/osd_client.h |  5 ++++-
>>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>>> 3 files changed, 43 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>> index 3c1155803444..40664e13cc0f 100644
>>> --- a/fs/ceph/super.c
>>> +++ b/fs/ceph/super.c
>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>>>        if (!fsc)
>>>                return;
>>>        fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>>> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>>>        ceph_mdsc_force_umount(fsc->mdsc);
>>>        return;
>>> }
>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>>> index b73dd7ebe585..f61736963236 100644
>>> --- a/include/linux/ceph/osd_client.h
>>> +++ b/include/linux/ceph/osd_client.h
>>> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>>>        struct rb_root         linger_map_checks;
>>>        atomic_t               num_requests;
>>>        atomic_t               num_homeless;
>>> +       int                    abort_code;
>>
>> Why osdc->abort_code and all __submit_request() hunks are needed?
>> If we are in a forced umount situation, no new I/Os should be accepted
>> anyway.
>
> No code guarantees that ceph_writepages_start()/writepage_nounlock() are
> not being executed when user does forced umount.  They may start new
> osd requests after forced umount.

I haven't traced through forced umount steps, but it seems like there
must be a point where we stop accepting requests and attempt to quiesce
the state.

The patch talks about avoiding getting stuck in ceph_osdc_sync().
Is it guaranteed that no new OSD requests can be started after it
completes?

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng May 14, 2018, 9:05 a.m. UTC | #7
> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> 
>> 
>>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote:
>>> 
>>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>> this avoid force umount getting stuck at ceph_osdc_sync()
>>>> 
>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>>> ---
>>>> fs/ceph/super.c                 |  1 +
>>>> include/linux/ceph/osd_client.h |  5 ++++-
>>>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>>>> 3 files changed, 43 insertions(+), 6 deletions(-)
>>>> 
>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>> index 3c1155803444..40664e13cc0f 100644
>>>> --- a/fs/ceph/super.c
>>>> +++ b/fs/ceph/super.c
>>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>>>>       if (!fsc)
>>>>               return;
>>>>       fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>>>> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>>>>       ceph_mdsc_force_umount(fsc->mdsc);
>>>>       return;
>>>> }
>>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>>>> index b73dd7ebe585..f61736963236 100644
>>>> --- a/include/linux/ceph/osd_client.h
>>>> +++ b/include/linux/ceph/osd_client.h
>>>> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>>>>       struct rb_root         linger_map_checks;
>>>>       atomic_t               num_requests;
>>>>       atomic_t               num_homeless;
>>>> +       int                    abort_code;
>>> 
>>> Why osdc->abort_code and all __submit_request() hunks are needed?
>>> If we are in a forced umount situation, no new I/Os should be accepted
>>> anyway.
>> 
>> No code guarantees that ceph_writepages_start()/writepage_nounlock() are
>> not being executed when user does forced umount.  They may start new
>> osd requests after forced umount.
> 
> I haven't traced through forced umount steps, but it seems like there
> must be a point where we stop accepting requests and attempt to quiesce
> the state.
> 

To support this,  cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph.


> The patch talks about avoiding getting stuck in ceph_osdc_sync().
> Is it guaranteed that no new OSD requests can be started after it
> completes?
> 

No, it doesn’t.

Regards
Yan, Zheng

> Thanks,
> 
>                Ilya

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov May 14, 2018, 9:14 a.m. UTC | #8
On Mon, May 14, 2018 at 11:05 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
>> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>
>>>
>>>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>
>>>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>> this avoid force umount getting stuck at ceph_osdc_sync()
>>>>>
>>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>>>> ---
>>>>> fs/ceph/super.c                 |  1 +
>>>>> include/linux/ceph/osd_client.h |  5 ++++-
>>>>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>>>>> 3 files changed, 43 insertions(+), 6 deletions(-)
>>>>>
>>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>>> index 3c1155803444..40664e13cc0f 100644
>>>>> --- a/fs/ceph/super.c
>>>>> +++ b/fs/ceph/super.c
>>>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>>>>>       if (!fsc)
>>>>>               return;
>>>>>       fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>>>>> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>>>>>       ceph_mdsc_force_umount(fsc->mdsc);
>>>>>       return;
>>>>> }
>>>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>>>>> index b73dd7ebe585..f61736963236 100644
>>>>> --- a/include/linux/ceph/osd_client.h
>>>>> +++ b/include/linux/ceph/osd_client.h
>>>>> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>>>>>       struct rb_root         linger_map_checks;
>>>>>       atomic_t               num_requests;
>>>>>       atomic_t               num_homeless;
>>>>> +       int                    abort_code;
>>>>
>>>> Why osdc->abort_code and all __submit_request() hunks are needed?
>>>> If we are in a forced umount situation, no new I/Os should be accepted
>>>> anyway.
>>>
>>> No code guarantees that ceph_writepages_start()/writepage_nounlock() are
>>> not being executed when user does forced umount.  They may start new
>>> osd requests after forced umount.
>>
>> I haven't traced through forced umount steps, but it seems like there
>> must be a point where we stop accepting requests and attempt to quiesce
>> the state.
>>
>
> To support this,  cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph.
>
>
>> The patch talks about avoiding getting stuck in ceph_osdc_sync().
>> Is it guaranteed that no new OSD requests can be started after it
>> completes?
>>
>
> No, it doesn’t.

What is the point of calling ceph_osdc_sync() in the first place then?

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yan, Zheng May 14, 2018, 9:51 a.m. UTC | #9
> On May 14, 2018, at 17:14, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Mon, May 14, 2018 at 11:05 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> 
>>> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote:
>>> 
>>> On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>> 
>>>> 
>>>>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote:
>>>>> 
>>>>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>>>> this avoid force umount getting stuck at ceph_osdc_sync()
>>>>>> 
>>>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>>>>>> ---
>>>>>> fs/ceph/super.c                 |  1 +
>>>>>> include/linux/ceph/osd_client.h |  5 ++++-
>>>>>> net/ceph/osd_client.c           | 43 ++++++++++++++++++++++++++++++++++++-----
>>>>>> 3 files changed, 43 insertions(+), 6 deletions(-)
>>>>>> 
>>>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c
>>>>>> index 3c1155803444..40664e13cc0f 100644
>>>>>> --- a/fs/ceph/super.c
>>>>>> +++ b/fs/ceph/super.c
>>>>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb)
>>>>>>      if (!fsc)
>>>>>>              return;
>>>>>>      fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
>>>>>> +       ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
>>>>>>      ceph_mdsc_force_umount(fsc->mdsc);
>>>>>>      return;
>>>>>> }
>>>>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
>>>>>> index b73dd7ebe585..f61736963236 100644
>>>>>> --- a/include/linux/ceph/osd_client.h
>>>>>> +++ b/include/linux/ceph/osd_client.h
>>>>>> @@ -347,6 +347,7 @@ struct ceph_osd_client {
>>>>>>      struct rb_root         linger_map_checks;
>>>>>>      atomic_t               num_requests;
>>>>>>      atomic_t               num_homeless;
>>>>>> +       int                    abort_code;
>>>>> 
>>>>> Why osdc->abort_code and all __submit_request() hunks are needed?
>>>>> If we are in a forced umount situation, no new I/Os should be accepted
>>>>> anyway.
>>>> 
>>>> No code guarantees that ceph_writepages_start()/writepage_nounlock() are
>>>> not being executed when user does forced umount.  They may start new
>>>> osd requests after forced umount.
>>> 
>>> I haven't traced through forced umount steps, but it seems like there
>>> must be a point where we stop accepting requests and attempt to quiesce
>>> the state.
>>> 
>> 
>> To support this,  cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph.
>> 
>> 
>>> The patch talks about avoiding getting stuck in ceph_osdc_sync().
>>> Is it guaranteed that no new OSD requests can be started after it
>>> completes?
>>> 
>> 
>> No, it doesn’t.
> 
> What is the point of calling ceph_osdc_sync() in the first place then?

Sorry, I was wrong about where he hang occurs.  It’s at


[<0>] io_schedule+0xd/0x30
[<0>] wait_on_page_bit_common+0xc6/0x130
[<0>] __filemap_fdatawait_range+0xbd/0x100
[<0>] filemap_fdatawait_keep_errors+0x15/0x40
[<0>] sync_inodes_sb+0x1cf/0x240
[<0>] sync_filesystem+0x52/0x90
[<0>] generic_shutdown_super+0x1d/0x110
[<0>] ceph_kill_sb+0x28/0x80 [ceph]
[<0>] deactivate_locked_super+0x35/0x60
[<0>] cleanup_mnt+0x36/0x70
[<0>] task_work_run+0x79/0xa0
[<0>] exit_to_usermode_loop+0x62/0x70
[<0>] do_syscall_64+0xdb/0xf0
[<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0>] 0xffffffffffffffff




> 
> Thanks,
> 
>                Ilya

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov May 16, 2018, 5:37 p.m. UTC | #10
On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
> [...]
>
> Sorry, I was wrong about where he hang occurs.  It’s at
>
> [<0>] io_schedule+0xd/0x30
> [<0>] wait_on_page_bit_common+0xc6/0x130
> [<0>] __filemap_fdatawait_range+0xbd/0x100
> [<0>] filemap_fdatawait_keep_errors+0x15/0x40
> [<0>] sync_inodes_sb+0x1cf/0x240
> [<0>] sync_filesystem+0x52/0x90
> [<0>] generic_shutdown_super+0x1d/0x110
> [<0>] ceph_kill_sb+0x28/0x80 [ceph]
> [<0>] deactivate_locked_super+0x35/0x60
> [<0>] cleanup_mnt+0x36/0x70
> [<0>] task_work_run+0x79/0xa0
> [<0>] exit_to_usermode_loop+0x62/0x70
> [<0>] do_syscall_64+0xdb/0xf0
> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [<0>] 0xffffffffffffffff

Makes sense, please make sure to update the commit message.

I pushed wip-umount-force and gave it a quick test.  The filesystem
part appears to need improvement -- lots of "VFS: Busy inodes after
unmount of ceph", more often than not with various crashes.  See the
attached.

Thanks,

                Ilya
[ 1239.752113] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.753045] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.753925] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.754828] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.755728] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.756633] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.757534] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.758481] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.759464] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.760857] ceph: writepage_start 000000004280d596 1099511627776 forced umount
[ 1239.816191] ceph:  dropping dirty Fw state for 000000004280d596 1099511627776
[ 1239.817209] ceph:  dropping dirty+flushing Fw state for 000000004280d596 1099511627776
[ 1239.819095] ceph: invalidate_pages 000000004280d596 1099511627776 forced umount
[ 1239.823689] VFS: Busy inodes after unmount of ceph. Self-destruct in 5 seconds.  Have a nice day...
[ 1239.995759] BUG: unable to handle kernel NULL pointer dereference at 0000000000000150
[ 1239.996454] PGD 0 P4D 0 
[ 1239.996672] Oops: 0000 [#1] PREEMPT SMP PTI
[ 1239.997014] Modules linked in:
[ 1239.997268] CPU: 1 PID: 19 Comm: kworker/1:0 Not tainted 4.17.0-rc3-vm+ #112
[ 1239.997839] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014
[ 1239.998612] Workqueue: ceph-pg-invalid ceph_invalidate_work
[ 1239.999067] RIP: 0010:ceph_check_caps+0x41/0xa00
[ 1239.999443] RSP: 0018:ffffc900000a3b00 EFLAGS: 00010286
[ 1239.999869] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000001
[ 1240.000444] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880073ce0898
[ 1240.001020] RBP: ffff880073ce0d88 R08: 0000000000000000 R09: 0000000000000000
[ 1240.001593] R10: ffffc900000a3b50 R11: 47dbc355d3ae36f2 R12: ffff880073ce08a8
[ 1240.002168] R13: ffff880073ce0898 R14: 0000000000000000 R15: 0000000000000000
[ 1240.002744] FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
[ 1240.003414] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1240.003882] CR2: 0000000000000150 CR3: 0000000076418000 CR4: 00000000000006a0
[ 1240.004456] Call Trace:
[ 1240.004664]  ? find_held_lock+0x2d/0x90
[ 1240.004979]  ? ceph_put_wrbuffer_cap_refs+0x393/0x3c0
[ 1240.005386]  ceph_put_wrbuffer_cap_refs+0x20f/0x3c0
[ 1240.005784]  ceph_invalidatepage+0xcb/0xf0
[ 1240.006119]  truncate_cleanup_page+0x64/0xa0
[ 1240.006468]  truncate_inode_pages_range+0x18e/0x6c0
[ 1240.006866]  truncate_pagecache+0x43/0x60
[ 1240.007193]  ceph_invalidate_work+0x9d/0x200
[ 1240.007541]  process_one_work+0x2ee/0x5b0
[ 1240.007870]  worker_thread+0x20a/0x390
[ 1240.008176]  ? process_one_work+0x5b0/0x5b0
[ 1240.008516]  kthread+0x122/0x130
[ 1240.008784]  ? kthread_create_worker_on_cpu+0x50/0x50
[ 1240.009193]  ret_from_fork+0x3a/0x50
[ 1240.009485] Code: f3 48 81 ec 98 00 00 00 48 8b 87 18 05 00 00 48 8b 80 78 06 00 00 4c 8b 70 28 48 8d 87 f0 04 00 00 48 89 44 24 38 b8 01 00 00 00 <41> 8b ae 50 01 00 00 85 ed 0f 44 c6 83 e0 01 88 44 24 33 48 8d 
[ 1240.011020] RIP: ceph_check_caps+0x41/0xa00 RSP: ffffc900000a3b00
[ 1240.011509] CR2: 0000000000000150
[ 1240.011784] ---[ end trace 39e8d9f1dfbee59c ]---
Yan, Zheng May 18, 2018, 8:14 a.m. UTC | #11
> On May 17, 2018, at 01:37, Ilya Dryomov <idryomov@gmail.com> wrote:
> 
> On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote:
>> 
>> [...]
>> 
>> Sorry, I was wrong about where he hang occurs.  It’s at
>> 
>> [<0>] io_schedule+0xd/0x30
>> [<0>] wait_on_page_bit_common+0xc6/0x130
>> [<0>] __filemap_fdatawait_range+0xbd/0x100
>> [<0>] filemap_fdatawait_keep_errors+0x15/0x40
>> [<0>] sync_inodes_sb+0x1cf/0x240
>> [<0>] sync_filesystem+0x52/0x90
>> [<0>] generic_shutdown_super+0x1d/0x110
>> [<0>] ceph_kill_sb+0x28/0x80 [ceph]
>> [<0>] deactivate_locked_super+0x35/0x60
>> [<0>] cleanup_mnt+0x36/0x70
>> [<0>] task_work_run+0x79/0xa0
>> [<0>] exit_to_usermode_loop+0x62/0x70
>> [<0>] do_syscall_64+0xdb/0xf0
>> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
>> [<0>] 0xffffffffffffffff
> 
> Makes sense, please make sure to update the commit message.
> 
> I pushed wip-umount-force and gave it a quick test.  The filesystem
> part appears to need improvement -- lots of "VFS: Busy inodes after
> unmount of ceph", more often than not with various crashes.  See the
> attached.
> 

I push a new commit to wip-umount-force, could you have a try.

Regards
Yan, Zheng


> Thanks,
> 
>                Ilya
> <busy-inodes.txt>

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ilya Dryomov May 18, 2018, 3:21 p.m. UTC | #12
On Fri, May 18, 2018 at 10:14 AM, Yan, Zheng <zyan@redhat.com> wrote:
>
>> On May 17, 2018, at 01:37, Ilya Dryomov <idryomov@gmail.com> wrote:
>>
>> On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote:
>>>
>>> [...]
>>>
>>> Sorry, I was wrong about where he hang occurs.  It’s at
>>>
>>> [<0>] io_schedule+0xd/0x30
>>> [<0>] wait_on_page_bit_common+0xc6/0x130
>>> [<0>] __filemap_fdatawait_range+0xbd/0x100
>>> [<0>] filemap_fdatawait_keep_errors+0x15/0x40
>>> [<0>] sync_inodes_sb+0x1cf/0x240
>>> [<0>] sync_filesystem+0x52/0x90
>>> [<0>] generic_shutdown_super+0x1d/0x110
>>> [<0>] ceph_kill_sb+0x28/0x80 [ceph]
>>> [<0>] deactivate_locked_super+0x35/0x60
>>> [<0>] cleanup_mnt+0x36/0x70
>>> [<0>] task_work_run+0x79/0xa0
>>> [<0>] exit_to_usermode_loop+0x62/0x70
>>> [<0>] do_syscall_64+0xdb/0xf0
>>> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>> [<0>] 0xffffffffffffffff
>>
>> Makes sense, please make sure to update the commit message.
>>
>> I pushed wip-umount-force and gave it a quick test.  The filesystem
>> part appears to need improvement -- lots of "VFS: Busy inodes after
>> unmount of ceph", more often than not with various crashes.  See the
>> attached.
>>
>
> I push a new commit to wip-umount-force, could you have a try.

Seems to help -- my (very simple) test case now passes.

Could you repush wip-umount-force with an updated commit message for
"ceph: abort osd requests on force umount" and I'll add it to testing?

Thanks,

                Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3c1155803444..40664e13cc0f 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -793,6 +793,7 @@  static void ceph_umount_begin(struct super_block *sb)
 	if (!fsc)
 		return;
 	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
+	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
 	ceph_mdsc_force_umount(fsc->mdsc);
 	return;
 }
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index b73dd7ebe585..f61736963236 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -347,6 +347,7 @@  struct ceph_osd_client {
 	struct rb_root         linger_map_checks;
 	atomic_t               num_requests;
 	atomic_t               num_homeless;
+	int		       abort_code;
 	struct delayed_work    timeout_work;
 	struct delayed_work    osds_timeout_work;
 #ifdef CONFIG_DEBUG_FS
@@ -377,7 +378,9 @@  extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
 				   struct ceph_msg *msg);
 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
 				 struct ceph_msg *msg);
-void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
+extern void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32);
+extern void ceph_osdc_abort_requests(struct ceph_osd_client *osdc,
+				     int err_code);
 
 extern void osd_req_op_init(struct ceph_osd_request *osd_req,
 			    unsigned int which, u16 opcode, u32 flags);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 08b5fc1f90cc..959af0e165ba 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2165,9 +2165,9 @@  static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	struct ceph_osd_client *osdc = req->r_osdc;
 	struct ceph_osd *osd;
 	enum calc_target_result ct_res;
+	bool abort_code = 0;
 	bool need_send = false;
 	bool promoted = false;
-	bool need_abort = false;
 
 	WARN_ON(req->r_tid);
 	dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
@@ -2183,7 +2183,9 @@  static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 		goto promote;
 	}
 
-	if (osdc->osdmap->epoch < osdc->epoch_barrier) {
+	if (osdc->abort_code) {
+		abort_code = osdc->abort_code;
+	} else if (osdc->osdmap->epoch < osdc->epoch_barrier) {
 		dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch,
 		     osdc->epoch_barrier);
 		req->r_t.paused = true;
@@ -2208,7 +2210,7 @@  static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 		req->r_t.paused = true;
 		maybe_request_map(osdc);
 		if (req->r_abort_on_full)
-			need_abort = true;
+			abort_code = -ENOSPC;
 	} else if (!osd_homeless(osd)) {
 		need_send = true;
 	} else {
@@ -2225,8 +2227,8 @@  static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
 	link_request(osd, req);
 	if (need_send)
 		send_request(req);
-	else if (need_abort)
-		complete_request(req, -ENOSPC);
+	else if (abort_code)
+		complete_request(req, abort_code);
 	mutex_unlock(&osd->lock);
 
 	if (ct_res == CALC_TARGET_POOL_DNE)
@@ -2366,6 +2368,37 @@  void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb)
 }
 EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier);
 
+void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err_code)
+{
+	struct rb_node *n, *m;
+	struct ceph_osd *osd;
+	struct ceph_osd_request *req;
+
+	dout("%s err_code %d\n", __func__, err_code);
+
+	down_write(&osdc->lock);
+	osdc->abort_code = err_code;
+
+	for (n = rb_first(&osdc->osds); n; n = rb_next(n)) {
+		osd = rb_entry(n, struct ceph_osd, o_node);
+		m = rb_first(&osd->o_requests);
+		while (m) {
+			req = rb_entry(m, struct ceph_osd_request, r_node);
+			m = rb_next(m);
+			abort_request(req, err_code);
+		}
+	}
+
+	m = rb_first(&osdc->homeless_osd.o_requests);
+	while (m) {
+		req = rb_entry(m, struct ceph_osd_request, r_node);
+		m = rb_next(m);
+		abort_request(req, err_code);
+	}
+	up_write(&osdc->lock);
+}
+EXPORT_SYMBOL(ceph_osdc_abort_requests);
+
 /*
  * Drop all pending requests that are stalled waiting on a full condition to
  * clear, and complete them with ENOSPC as the return code. Set the