Message ID | 20180511091202.65250-1-zyan@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
"Yan, Zheng" <zyan@redhat.com> writes: > this avoid force umount getting stuck at ceph_osdc_sync() > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com> > --- > fs/ceph/super.c | 1 + > include/linux/ceph/osd_client.h | 5 ++++- > net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- > 3 files changed, 43 insertions(+), 6 deletions(-) > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 3c1155803444..40664e13cc0f 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) > if (!fsc) > return; > fsc->mount_state = CEPH_MOUNT_SHUTDOWN; > + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); > ceph_mdsc_force_umount(fsc->mdsc); > return; > } > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > index b73dd7ebe585..f61736963236 100644 > --- a/include/linux/ceph/osd_client.h > +++ b/include/linux/ceph/osd_client.h > @@ -347,6 +347,7 @@ struct ceph_osd_client { > struct rb_root linger_map_checks; > atomic_t num_requests; > atomic_t num_homeless; > + int abort_code; > struct delayed_work timeout_work; > struct delayed_work osds_timeout_work; > #ifdef CONFIG_DEBUG_FS > @@ -377,7 +378,9 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, > struct ceph_msg *msg); > extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, > struct ceph_msg *msg); > -void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); > +extern void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32); nit: Why did you drop the arg name 'eb'? > +extern void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, + > int err_code); > > extern void osd_req_op_init(struct ceph_osd_request *osd_req, > unsigned int which, u16 opcode, u32 flags); > diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c > index 08b5fc1f90cc..959af0e165ba 100644 > --- a/net/ceph/osd_client.c > +++ b/net/ceph/osd_client.c > @@ -2165,9 +2165,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) > struct ceph_osd_client *osdc = req->r_osdc; > struct ceph_osd *osd; > enum calc_target_result ct_res; > + bool abort_code = 0; This should be an int, not a bool. Cheers,
On Fri, 11 May 2018 17:12:02 +0800, Yan, Zheng wrote: > this avoid force umount getting stuck at ceph_osdc_sync() > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com> > --- > fs/ceph/super.c | 1 + > include/linux/ceph/osd_client.h | 5 ++++- > net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- > 3 files changed, 43 insertions(+), 6 deletions(-) > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 3c1155803444..40664e13cc0f 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) > if (!fsc) > return; > fsc->mount_state = CEPH_MOUNT_SHUTDOWN; > + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); Does the unmount need to guarantee that the failed requests won't be processed by any OSDs, or is local cancellation fine here? Cheers, David -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: > this avoid force umount getting stuck at ceph_osdc_sync() > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com> > --- > fs/ceph/super.c | 1 + > include/linux/ceph/osd_client.h | 5 ++++- > net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- > 3 files changed, 43 insertions(+), 6 deletions(-) > > diff --git a/fs/ceph/super.c b/fs/ceph/super.c > index 3c1155803444..40664e13cc0f 100644 > --- a/fs/ceph/super.c > +++ b/fs/ceph/super.c > @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) > if (!fsc) > return; > fsc->mount_state = CEPH_MOUNT_SHUTDOWN; > + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); > ceph_mdsc_force_umount(fsc->mdsc); > return; > } > diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h > index b73dd7ebe585..f61736963236 100644 > --- a/include/linux/ceph/osd_client.h > +++ b/include/linux/ceph/osd_client.h > @@ -347,6 +347,7 @@ struct ceph_osd_client { > struct rb_root linger_map_checks; > atomic_t num_requests; > atomic_t num_homeless; > + int abort_code; Why osdc->abort_code and all __submit_request() hunks are needed? If we are in a forced umount situation, no new I/Os should be accepted anyway. osdc->lock taken for write in ceph_osdc_abort_requests() is enough to lock out __submit_request(). Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On May 11, 2018, at 20:00, David Disseldorp <ddiss@suse.de> wrote: > > On Fri, 11 May 2018 17:12:02 +0800, Yan, Zheng wrote: > >> this avoid force umount getting stuck at ceph_osdc_sync() >> >> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >> --- >> fs/ceph/super.c | 1 + >> include/linux/ceph/osd_client.h | 5 ++++- >> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >> 3 files changed, 43 insertions(+), 6 deletions(-) >> >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >> index 3c1155803444..40664e13cc0f 100644 >> --- a/fs/ceph/super.c >> +++ b/fs/ceph/super.c >> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >> if (!fsc) >> return; >> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); > > Does the unmount need to guarantee that the failed requests won't be > processed by any OSDs, or is local cancellation fine here? > umount -f does not need to guarantee that Regards Yan, Zheng > Cheers, David -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote: > > On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: >> this avoid force umount getting stuck at ceph_osdc_sync() >> >> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >> --- >> fs/ceph/super.c | 1 + >> include/linux/ceph/osd_client.h | 5 ++++- >> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >> 3 files changed, 43 insertions(+), 6 deletions(-) >> >> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >> index 3c1155803444..40664e13cc0f 100644 >> --- a/fs/ceph/super.c >> +++ b/fs/ceph/super.c >> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >> if (!fsc) >> return; >> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); >> ceph_mdsc_force_umount(fsc->mdsc); >> return; >> } >> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h >> index b73dd7ebe585..f61736963236 100644 >> --- a/include/linux/ceph/osd_client.h >> +++ b/include/linux/ceph/osd_client.h >> @@ -347,6 +347,7 @@ struct ceph_osd_client { >> struct rb_root linger_map_checks; >> atomic_t num_requests; >> atomic_t num_homeless; >> + int abort_code; > > Why osdc->abort_code and all __submit_request() hunks are needed? > If we are in a forced umount situation, no new I/Os should be accepted > anyway. No code guarantees that ceph_writepages_start()/writepage_nounlock() are not being executed when user does forced umount. They may start new osd requests after forced umount. Regards Yan, Zheng > > osdc->lock taken for write in ceph_osdc_abort_requests() is enough to > lock out __submit_request(). > > Thanks, > > Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote: > > >> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote: >> >> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: >>> this avoid force umount getting stuck at ceph_osdc_sync() >>> >>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >>> --- >>> fs/ceph/super.c | 1 + >>> include/linux/ceph/osd_client.h | 5 ++++- >>> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >>> 3 files changed, 43 insertions(+), 6 deletions(-) >>> >>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>> index 3c1155803444..40664e13cc0f 100644 >>> --- a/fs/ceph/super.c >>> +++ b/fs/ceph/super.c >>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >>> if (!fsc) >>> return; >>> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >>> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); >>> ceph_mdsc_force_umount(fsc->mdsc); >>> return; >>> } >>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h >>> index b73dd7ebe585..f61736963236 100644 >>> --- a/include/linux/ceph/osd_client.h >>> +++ b/include/linux/ceph/osd_client.h >>> @@ -347,6 +347,7 @@ struct ceph_osd_client { >>> struct rb_root linger_map_checks; >>> atomic_t num_requests; >>> atomic_t num_homeless; >>> + int abort_code; >> >> Why osdc->abort_code and all __submit_request() hunks are needed? >> If we are in a forced umount situation, no new I/Os should be accepted >> anyway. > > No code guarantees that ceph_writepages_start()/writepage_nounlock() are > not being executed when user does forced umount. They may start new > osd requests after forced umount. I haven't traced through forced umount steps, but it seems like there must be a point where we stop accepting requests and attempt to quiesce the state. The patch talks about avoiding getting stuck in ceph_osdc_sync(). Is it guaranteed that no new OSD requests can be started after it completes? Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote: > > On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote: >> >> >>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote: >>> >>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: >>>> this avoid force umount getting stuck at ceph_osdc_sync() >>>> >>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >>>> --- >>>> fs/ceph/super.c | 1 + >>>> include/linux/ceph/osd_client.h | 5 ++++- >>>> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >>>> 3 files changed, 43 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>>> index 3c1155803444..40664e13cc0f 100644 >>>> --- a/fs/ceph/super.c >>>> +++ b/fs/ceph/super.c >>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >>>> if (!fsc) >>>> return; >>>> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >>>> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); >>>> ceph_mdsc_force_umount(fsc->mdsc); >>>> return; >>>> } >>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h >>>> index b73dd7ebe585..f61736963236 100644 >>>> --- a/include/linux/ceph/osd_client.h >>>> +++ b/include/linux/ceph/osd_client.h >>>> @@ -347,6 +347,7 @@ struct ceph_osd_client { >>>> struct rb_root linger_map_checks; >>>> atomic_t num_requests; >>>> atomic_t num_homeless; >>>> + int abort_code; >>> >>> Why osdc->abort_code and all __submit_request() hunks are needed? >>> If we are in a forced umount situation, no new I/Os should be accepted >>> anyway. >> >> No code guarantees that ceph_writepages_start()/writepage_nounlock() are >> not being executed when user does forced umount. They may start new >> osd requests after forced umount. > > I haven't traced through forced umount steps, but it seems like there > must be a point where we stop accepting requests and attempt to quiesce > the state. > To support this, cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph. > The patch talks about avoiding getting stuck in ceph_osdc_sync(). > Is it guaranteed that no new OSD requests can be started after it > completes? > No, it doesn’t. Regards Yan, Zheng > Thanks, > > Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 14, 2018 at 11:05 AM, Yan, Zheng <zyan@redhat.com> wrote: > >> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote: >> >> On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote: >>> >>> >>>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote: >>>> >>>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: >>>>> this avoid force umount getting stuck at ceph_osdc_sync() >>>>> >>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >>>>> --- >>>>> fs/ceph/super.c | 1 + >>>>> include/linux/ceph/osd_client.h | 5 ++++- >>>>> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >>>>> 3 files changed, 43 insertions(+), 6 deletions(-) >>>>> >>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>>>> index 3c1155803444..40664e13cc0f 100644 >>>>> --- a/fs/ceph/super.c >>>>> +++ b/fs/ceph/super.c >>>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >>>>> if (!fsc) >>>>> return; >>>>> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >>>>> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); >>>>> ceph_mdsc_force_umount(fsc->mdsc); >>>>> return; >>>>> } >>>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h >>>>> index b73dd7ebe585..f61736963236 100644 >>>>> --- a/include/linux/ceph/osd_client.h >>>>> +++ b/include/linux/ceph/osd_client.h >>>>> @@ -347,6 +347,7 @@ struct ceph_osd_client { >>>>> struct rb_root linger_map_checks; >>>>> atomic_t num_requests; >>>>> atomic_t num_homeless; >>>>> + int abort_code; >>>> >>>> Why osdc->abort_code and all __submit_request() hunks are needed? >>>> If we are in a forced umount situation, no new I/Os should be accepted >>>> anyway. >>> >>> No code guarantees that ceph_writepages_start()/writepage_nounlock() are >>> not being executed when user does forced umount. They may start new >>> osd requests after forced umount. >> >> I haven't traced through forced umount steps, but it seems like there >> must be a point where we stop accepting requests and attempt to quiesce >> the state. >> > > To support this, cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph. > > >> The patch talks about avoiding getting stuck in ceph_osdc_sync(). >> Is it guaranteed that no new OSD requests can be started after it >> completes? >> > > No, it doesn’t. What is the point of calling ceph_osdc_sync() in the first place then? Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> On May 14, 2018, at 17:14, Ilya Dryomov <idryomov@gmail.com> wrote: > > On Mon, May 14, 2018 at 11:05 AM, Yan, Zheng <zyan@redhat.com> wrote: >> >>> On May 14, 2018, at 16:37, Ilya Dryomov <idryomov@gmail.com> wrote: >>> >>> On Sat, May 12, 2018 at 1:38 AM, Yan, Zheng <zyan@redhat.com> wrote: >>>> >>>> >>>>> On May 11, 2018, at 20:06, Ilya Dryomov <idryomov@gmail.com> wrote: >>>>> >>>>> On Fri, May 11, 2018 at 11:12 AM, Yan, Zheng <zyan@redhat.com> wrote: >>>>>> this avoid force umount getting stuck at ceph_osdc_sync() >>>>>> >>>>>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com> >>>>>> --- >>>>>> fs/ceph/super.c | 1 + >>>>>> include/linux/ceph/osd_client.h | 5 ++++- >>>>>> net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- >>>>>> 3 files changed, 43 insertions(+), 6 deletions(-) >>>>>> >>>>>> diff --git a/fs/ceph/super.c b/fs/ceph/super.c >>>>>> index 3c1155803444..40664e13cc0f 100644 >>>>>> --- a/fs/ceph/super.c >>>>>> +++ b/fs/ceph/super.c >>>>>> @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) >>>>>> if (!fsc) >>>>>> return; >>>>>> fsc->mount_state = CEPH_MOUNT_SHUTDOWN; >>>>>> + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); >>>>>> ceph_mdsc_force_umount(fsc->mdsc); >>>>>> return; >>>>>> } >>>>>> diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h >>>>>> index b73dd7ebe585..f61736963236 100644 >>>>>> --- a/include/linux/ceph/osd_client.h >>>>>> +++ b/include/linux/ceph/osd_client.h >>>>>> @@ -347,6 +347,7 @@ struct ceph_osd_client { >>>>>> struct rb_root linger_map_checks; >>>>>> atomic_t num_requests; >>>>>> atomic_t num_homeless; >>>>>> + int abort_code; >>>>> >>>>> Why osdc->abort_code and all __submit_request() hunks are needed? >>>>> If we are in a forced umount situation, no new I/Os should be accepted >>>>> anyway. >>>> >>>> No code guarantees that ceph_writepages_start()/writepage_nounlock() are >>>> not being executed when user does forced umount. They may start new >>>> osd requests after forced umount. >>> >>> I haven't traced through forced umount steps, but it seems like there >>> must be a point where we stop accepting requests and attempt to quiesce >>> the state. >>> >> >> To support this, cephfs code needs to introduce a rwsem, read lock the rwsem before calling ceph_osdc_wait_request(). Besides, the rwsem can not be used in the cases of blocking osdc functions (such as ceph_osdc_readpages). So I think it’s better to implement this in libceph. >> >> >>> The patch talks about avoiding getting stuck in ceph_osdc_sync(). >>> Is it guaranteed that no new OSD requests can be started after it >>> completes? >>> >> >> No, it doesn’t. > > What is the point of calling ceph_osdc_sync() in the first place then? Sorry, I was wrong about where he hang occurs. It’s at [<0>] io_schedule+0xd/0x30 [<0>] wait_on_page_bit_common+0xc6/0x130 [<0>] __filemap_fdatawait_range+0xbd/0x100 [<0>] filemap_fdatawait_keep_errors+0x15/0x40 [<0>] sync_inodes_sb+0x1cf/0x240 [<0>] sync_filesystem+0x52/0x90 [<0>] generic_shutdown_super+0x1d/0x110 [<0>] ceph_kill_sb+0x28/0x80 [ceph] [<0>] deactivate_locked_super+0x35/0x60 [<0>] cleanup_mnt+0x36/0x70 [<0>] task_work_run+0x79/0xa0 [<0>] exit_to_usermode_loop+0x62/0x70 [<0>] do_syscall_64+0xdb/0xf0 [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<0>] 0xffffffffffffffff > > Thanks, > > Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote: > > [...] > > Sorry, I was wrong about where he hang occurs. It’s at > > [<0>] io_schedule+0xd/0x30 > [<0>] wait_on_page_bit_common+0xc6/0x130 > [<0>] __filemap_fdatawait_range+0xbd/0x100 > [<0>] filemap_fdatawait_keep_errors+0x15/0x40 > [<0>] sync_inodes_sb+0x1cf/0x240 > [<0>] sync_filesystem+0x52/0x90 > [<0>] generic_shutdown_super+0x1d/0x110 > [<0>] ceph_kill_sb+0x28/0x80 [ceph] > [<0>] deactivate_locked_super+0x35/0x60 > [<0>] cleanup_mnt+0x36/0x70 > [<0>] task_work_run+0x79/0xa0 > [<0>] exit_to_usermode_loop+0x62/0x70 > [<0>] do_syscall_64+0xdb/0xf0 > [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [<0>] 0xffffffffffffffff Makes sense, please make sure to update the commit message. I pushed wip-umount-force and gave it a quick test. The filesystem part appears to need improvement -- lots of "VFS: Busy inodes after unmount of ceph", more often than not with various crashes. See the attached. Thanks, Ilya [ 1239.752113] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.753045] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.753925] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.754828] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.755728] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.756633] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.757534] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.758481] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.759464] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.760857] ceph: writepage_start 000000004280d596 1099511627776 forced umount [ 1239.816191] ceph: dropping dirty Fw state for 000000004280d596 1099511627776 [ 1239.817209] ceph: dropping dirty+flushing Fw state for 000000004280d596 1099511627776 [ 1239.819095] ceph: invalidate_pages 000000004280d596 1099511627776 forced umount [ 1239.823689] VFS: Busy inodes after unmount of ceph. Self-destruct in 5 seconds. Have a nice day... [ 1239.995759] BUG: unable to handle kernel NULL pointer dereference at 0000000000000150 [ 1239.996454] PGD 0 P4D 0 [ 1239.996672] Oops: 0000 [#1] PREEMPT SMP PTI [ 1239.997014] Modules linked in: [ 1239.997268] CPU: 1 PID: 19 Comm: kworker/1:0 Not tainted 4.17.0-rc3-vm+ #112 [ 1239.997839] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 1239.998612] Workqueue: ceph-pg-invalid ceph_invalidate_work [ 1239.999067] RIP: 0010:ceph_check_caps+0x41/0xa00 [ 1239.999443] RSP: 0018:ffffc900000a3b00 EFLAGS: 00010286 [ 1239.999869] RAX: 0000000000000001 RBX: 0000000000000002 RCX: 0000000000000001 [ 1240.000444] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880073ce0898 [ 1240.001020] RBP: ffff880073ce0d88 R08: 0000000000000000 R09: 0000000000000000 [ 1240.001593] R10: ffffc900000a3b50 R11: 47dbc355d3ae36f2 R12: ffff880073ce08a8 [ 1240.002168] R13: ffff880073ce0898 R14: 0000000000000000 R15: 0000000000000000 [ 1240.002744] FS: 0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000 [ 1240.003414] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1240.003882] CR2: 0000000000000150 CR3: 0000000076418000 CR4: 00000000000006a0 [ 1240.004456] Call Trace: [ 1240.004664] ? find_held_lock+0x2d/0x90 [ 1240.004979] ? ceph_put_wrbuffer_cap_refs+0x393/0x3c0 [ 1240.005386] ceph_put_wrbuffer_cap_refs+0x20f/0x3c0 [ 1240.005784] ceph_invalidatepage+0xcb/0xf0 [ 1240.006119] truncate_cleanup_page+0x64/0xa0 [ 1240.006468] truncate_inode_pages_range+0x18e/0x6c0 [ 1240.006866] truncate_pagecache+0x43/0x60 [ 1240.007193] ceph_invalidate_work+0x9d/0x200 [ 1240.007541] process_one_work+0x2ee/0x5b0 [ 1240.007870] worker_thread+0x20a/0x390 [ 1240.008176] ? process_one_work+0x5b0/0x5b0 [ 1240.008516] kthread+0x122/0x130 [ 1240.008784] ? kthread_create_worker_on_cpu+0x50/0x50 [ 1240.009193] ret_from_fork+0x3a/0x50 [ 1240.009485] Code: f3 48 81 ec 98 00 00 00 48 8b 87 18 05 00 00 48 8b 80 78 06 00 00 4c 8b 70 28 48 8d 87 f0 04 00 00 48 89 44 24 38 b8 01 00 00 00 <41> 8b ae 50 01 00 00 85 ed 0f 44 c6 83 e0 01 88 44 24 33 48 8d [ 1240.011020] RIP: ceph_check_caps+0x41/0xa00 RSP: ffffc900000a3b00 [ 1240.011509] CR2: 0000000000000150 [ 1240.011784] ---[ end trace 39e8d9f1dfbee59c ]---
> On May 17, 2018, at 01:37, Ilya Dryomov <idryomov@gmail.com> wrote: > > On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote: >> >> [...] >> >> Sorry, I was wrong about where he hang occurs. It’s at >> >> [<0>] io_schedule+0xd/0x30 >> [<0>] wait_on_page_bit_common+0xc6/0x130 >> [<0>] __filemap_fdatawait_range+0xbd/0x100 >> [<0>] filemap_fdatawait_keep_errors+0x15/0x40 >> [<0>] sync_inodes_sb+0x1cf/0x240 >> [<0>] sync_filesystem+0x52/0x90 >> [<0>] generic_shutdown_super+0x1d/0x110 >> [<0>] ceph_kill_sb+0x28/0x80 [ceph] >> [<0>] deactivate_locked_super+0x35/0x60 >> [<0>] cleanup_mnt+0x36/0x70 >> [<0>] task_work_run+0x79/0xa0 >> [<0>] exit_to_usermode_loop+0x62/0x70 >> [<0>] do_syscall_64+0xdb/0xf0 >> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 >> [<0>] 0xffffffffffffffff > > Makes sense, please make sure to update the commit message. > > I pushed wip-umount-force and gave it a quick test. The filesystem > part appears to need improvement -- lots of "VFS: Busy inodes after > unmount of ceph", more often than not with various crashes. See the > attached. > I push a new commit to wip-umount-force, could you have a try. Regards Yan, Zheng > Thanks, > > Ilya > <busy-inodes.txt> -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 18, 2018 at 10:14 AM, Yan, Zheng <zyan@redhat.com> wrote: > >> On May 17, 2018, at 01:37, Ilya Dryomov <idryomov@gmail.com> wrote: >> >> On Mon, May 14, 2018 at 11:51 AM, Yan, Zheng <zyan@redhat.com> wrote: >>> >>> [...] >>> >>> Sorry, I was wrong about where he hang occurs. It’s at >>> >>> [<0>] io_schedule+0xd/0x30 >>> [<0>] wait_on_page_bit_common+0xc6/0x130 >>> [<0>] __filemap_fdatawait_range+0xbd/0x100 >>> [<0>] filemap_fdatawait_keep_errors+0x15/0x40 >>> [<0>] sync_inodes_sb+0x1cf/0x240 >>> [<0>] sync_filesystem+0x52/0x90 >>> [<0>] generic_shutdown_super+0x1d/0x110 >>> [<0>] ceph_kill_sb+0x28/0x80 [ceph] >>> [<0>] deactivate_locked_super+0x35/0x60 >>> [<0>] cleanup_mnt+0x36/0x70 >>> [<0>] task_work_run+0x79/0xa0 >>> [<0>] exit_to_usermode_loop+0x62/0x70 >>> [<0>] do_syscall_64+0xdb/0xf0 >>> [<0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 >>> [<0>] 0xffffffffffffffff >> >> Makes sense, please make sure to update the commit message. >> >> I pushed wip-umount-force and gave it a quick test. The filesystem >> part appears to need improvement -- lots of "VFS: Busy inodes after >> unmount of ceph", more often than not with various crashes. See the >> attached. >> > > I push a new commit to wip-umount-force, could you have a try. Seems to help -- my (very simple) test case now passes. Could you repush wip-umount-force with an updated commit message for "ceph: abort osd requests on force umount" and I'll add it to testing? Thanks, Ilya -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 3c1155803444..40664e13cc0f 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -793,6 +793,7 @@ static void ceph_umount_begin(struct super_block *sb) if (!fsc) return; fsc->mount_state = CEPH_MOUNT_SHUTDOWN; + ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); ceph_mdsc_force_umount(fsc->mdsc); return; } diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index b73dd7ebe585..f61736963236 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -347,6 +347,7 @@ struct ceph_osd_client { struct rb_root linger_map_checks; atomic_t num_requests; atomic_t num_homeless; + int abort_code; struct delayed_work timeout_work; struct delayed_work osds_timeout_work; #ifdef CONFIG_DEBUG_FS @@ -377,7 +378,9 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); +extern void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32); +extern void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, + int err_code); extern void osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 08b5fc1f90cc..959af0e165ba 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -2165,9 +2165,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) struct ceph_osd_client *osdc = req->r_osdc; struct ceph_osd *osd; enum calc_target_result ct_res; + bool abort_code = 0; bool need_send = false; bool promoted = false; - bool need_abort = false; WARN_ON(req->r_tid); dout("%s req %p wrlocked %d\n", __func__, req, wrlocked); @@ -2183,7 +2183,9 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) goto promote; } - if (osdc->osdmap->epoch < osdc->epoch_barrier) { + if (osdc->abort_code) { + abort_code = osdc->abort_code; + } else if (osdc->osdmap->epoch < osdc->epoch_barrier) { dout("req %p epoch %u barrier %u\n", req, osdc->osdmap->epoch, osdc->epoch_barrier); req->r_t.paused = true; @@ -2208,7 +2210,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) req->r_t.paused = true; maybe_request_map(osdc); if (req->r_abort_on_full) - need_abort = true; + abort_code = -ENOSPC; } else if (!osd_homeless(osd)) { need_send = true; } else { @@ -2225,8 +2227,8 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked) link_request(osd, req); if (need_send) send_request(req); - else if (need_abort) - complete_request(req, -ENOSPC); + else if (abort_code) + complete_request(req, abort_code); mutex_unlock(&osd->lock); if (ct_res == CALC_TARGET_POOL_DNE) @@ -2366,6 +2368,37 @@ void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb) } EXPORT_SYMBOL(ceph_osdc_update_epoch_barrier); +void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err_code) +{ + struct rb_node *n, *m; + struct ceph_osd *osd; + struct ceph_osd_request *req; + + dout("%s err_code %d\n", __func__, err_code); + + down_write(&osdc->lock); + osdc->abort_code = err_code; + + for (n = rb_first(&osdc->osds); n; n = rb_next(n)) { + osd = rb_entry(n, struct ceph_osd, o_node); + m = rb_first(&osd->o_requests); + while (m) { + req = rb_entry(m, struct ceph_osd_request, r_node); + m = rb_next(m); + abort_request(req, err_code); + } + } + + m = rb_first(&osdc->homeless_osd.o_requests); + while (m) { + req = rb_entry(m, struct ceph_osd_request, r_node); + m = rb_next(m); + abort_request(req, err_code); + } + up_write(&osdc->lock); +} +EXPORT_SYMBOL(ceph_osdc_abort_requests); + /* * Drop all pending requests that are stalled waiting on a full condition to * clear, and complete them with ENOSPC as the return code. Set the
this avoid force umount getting stuck at ceph_osdc_sync() Signed-off-by: "Yan, Zheng" <zyan@redhat.com> --- fs/ceph/super.c | 1 + include/linux/ceph/osd_client.h | 5 ++++- net/ceph/osd_client.c | 43 ++++++++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 6 deletions(-)