Message ID | 20221108135554.558278-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v3] ceph: fix NULL pointer dereference for req->r_session | expand |
On Tue, Nov 8, 2022 at 2:56 PM <xiubli@redhat.com> wrote: > > From: Xiubo Li <xiubli@redhat.com> > > The request's r_session maybe changed when it was forwarded or > resent. > > Cc: stable@vger.kernel.org > URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/caps.c | 60 ++++++++++++++++---------------------------------- > 1 file changed, 19 insertions(+), 41 deletions(-) > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > index 894adfb4a092..83f9e18e3169 100644 > --- a/fs/ceph/caps.c > +++ b/fs/ceph/caps.c > @@ -2297,8 +2297,10 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; > struct ceph_inode_info *ci = ceph_inode(inode); > struct ceph_mds_request *req1 = NULL, *req2 = NULL; > + struct ceph_mds_session **sessions = NULL; > + struct ceph_mds_session *s; > unsigned int max_sessions; > - int ret, err = 0; > + int i, ret, err = 0; > > spin_lock(&ci->i_unsafe_lock); > if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { > @@ -2315,28 +2317,19 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > } > spin_unlock(&ci->i_unsafe_lock); > > - /* > - * The mdsc->max_sessions is unlikely to be changed > - * mostly, here we will retry it by reallocating the > - * sessions array memory to get rid of the mdsc->mutex > - * lock. > - */ > -retry: > - max_sessions = mdsc->max_sessions; > - > /* > * Trigger to flush the journal logs in all the relevant MDSes > * manually, or in the worst case we must wait at most 5 seconds > * to wait the journal logs to be flushed by the MDSes periodically. > */ > - if ((req1 || req2) && likely(max_sessions)) { > - struct ceph_mds_session **sessions = NULL; > - struct ceph_mds_session *s; > + mutex_lock(&mdsc->mutex); > + max_sessions = mdsc->max_sessions; > + if (req1 || req2) { > struct ceph_mds_request *req; > - int i; > > sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); > if (!sessions) { > + mutex_unlock(&mdsc->mutex); > err = -ENOMEM; > goto out; > } > @@ -2346,18 +2339,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > list_for_each_entry(req, &ci->i_unsafe_dirops, > r_unsafe_dir_item) { > s = req->r_session; > - if (!s) > + if (!s || unlikely(s->s_mds >= max_sessions)) > continue; > - if (unlikely(s->s_mds >= max_sessions)) { > - spin_unlock(&ci->i_unsafe_lock); > - for (i = 0; i < max_sessions; i++) { > - s = sessions[i]; > - if (s) > - ceph_put_mds_session(s); > - } > - kfree(sessions); > - goto retry; > - } > if (!sessions[s->s_mds]) { > s = ceph_get_mds_session(s); > sessions[s->s_mds] = s; > @@ -2368,18 +2351,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > list_for_each_entry(req, &ci->i_unsafe_iops, > r_unsafe_target_item) { > s = req->r_session; > - if (!s) > + if (!s || unlikely(s->s_mds >= max_sessions)) > continue; > - if (unlikely(s->s_mds >= max_sessions)) { > - spin_unlock(&ci->i_unsafe_lock); > - for (i = 0; i < max_sessions; i++) { > - s = sessions[i]; > - if (s) > - ceph_put_mds_session(s); > - } > - kfree(sessions); > - goto retry; > - } > if (!sessions[s->s_mds]) { > s = ceph_get_mds_session(s); > sessions[s->s_mds] = s; > @@ -2391,13 +2364,18 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > /* the auth MDS */ > spin_lock(&ci->i_ceph_lock); > if (ci->i_auth_cap) { > - s = ci->i_auth_cap->session; > - if (!sessions[s->s_mds]) > - sessions[s->s_mds] = ceph_get_mds_session(s); > + s = ci->i_auth_cap->session; > + if (likely(s->s_mds < max_sessions) > + && !sessions[s->s_mds]) { Hi Xiubo, Nit: keep && on the previous line for style consistency. > + sessions[s->s_mds] = ceph_get_mds_session(s); > + } > } > spin_unlock(&ci->i_ceph_lock); > + } > + mutex_unlock(&mdsc->mutex); > > - /* send flush mdlog request to MDSes */ > + /* send flush mdlog request to MDSes */ > + if (sessions) { Since mdlog is flushed only in "if (req1 || req2)" case, why not keep max_sessions loop there and avoid sessions != NULL check? Then, you could also move mdsc->mutex acquisition and max_sessions assignment into "if (req1 || req2)" branch and keep sessions, s and i declarations where there are today. Thanks, Ilya
On 09/11/2022 20:56, Ilya Dryomov wrote: > On Tue, Nov 8, 2022 at 2:56 PM <xiubli@redhat.com> wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> The request's r_session maybe changed when it was forwarded or >> resent. >> >> Cc: stable@vger.kernel.org >> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> fs/ceph/caps.c | 60 ++++++++++++++++---------------------------------- >> 1 file changed, 19 insertions(+), 41 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 894adfb4a092..83f9e18e3169 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -2297,8 +2297,10 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; >> struct ceph_inode_info *ci = ceph_inode(inode); >> struct ceph_mds_request *req1 = NULL, *req2 = NULL; >> + struct ceph_mds_session **sessions = NULL; >> + struct ceph_mds_session *s; >> unsigned int max_sessions; >> - int ret, err = 0; >> + int i, ret, err = 0; >> >> spin_lock(&ci->i_unsafe_lock); >> if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { >> @@ -2315,28 +2317,19 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >> } >> spin_unlock(&ci->i_unsafe_lock); >> >> - /* >> - * The mdsc->max_sessions is unlikely to be changed >> - * mostly, here we will retry it by reallocating the >> - * sessions array memory to get rid of the mdsc->mutex >> - * lock. >> - */ >> -retry: >> - max_sessions = mdsc->max_sessions; >> - >> /* >> * Trigger to flush the journal logs in all the relevant MDSes >> * manually, or in the worst case we must wait at most 5 seconds >> * to wait the journal logs to be flushed by the MDSes periodically. >> */ >> - if ((req1 || req2) && likely(max_sessions)) { >> - struct ceph_mds_session **sessions = NULL; >> - struct ceph_mds_session *s; >> + mutex_lock(&mdsc->mutex); >> + max_sessions = mdsc->max_sessions; >> + if (req1 || req2) { >> struct ceph_mds_request *req; >> - int i; >> >> sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); >> if (!sessions) { >> + mutex_unlock(&mdsc->mutex); >> err = -ENOMEM; >> goto out; >> } >> @@ -2346,18 +2339,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >> list_for_each_entry(req, &ci->i_unsafe_dirops, >> r_unsafe_dir_item) { >> s = req->r_session; >> - if (!s) >> + if (!s || unlikely(s->s_mds >= max_sessions)) >> continue; >> - if (unlikely(s->s_mds >= max_sessions)) { >> - spin_unlock(&ci->i_unsafe_lock); >> - for (i = 0; i < max_sessions; i++) { >> - s = sessions[i]; >> - if (s) >> - ceph_put_mds_session(s); >> - } >> - kfree(sessions); >> - goto retry; >> - } >> if (!sessions[s->s_mds]) { >> s = ceph_get_mds_session(s); >> sessions[s->s_mds] = s; >> @@ -2368,18 +2351,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >> list_for_each_entry(req, &ci->i_unsafe_iops, >> r_unsafe_target_item) { >> s = req->r_session; >> - if (!s) >> + if (!s || unlikely(s->s_mds >= max_sessions)) >> continue; >> - if (unlikely(s->s_mds >= max_sessions)) { >> - spin_unlock(&ci->i_unsafe_lock); >> - for (i = 0; i < max_sessions; i++) { >> - s = sessions[i]; >> - if (s) >> - ceph_put_mds_session(s); >> - } >> - kfree(sessions); >> - goto retry; >> - } >> if (!sessions[s->s_mds]) { >> s = ceph_get_mds_session(s); >> sessions[s->s_mds] = s; >> @@ -2391,13 +2364,18 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >> /* the auth MDS */ >> spin_lock(&ci->i_ceph_lock); >> if (ci->i_auth_cap) { >> - s = ci->i_auth_cap->session; >> - if (!sessions[s->s_mds]) >> - sessions[s->s_mds] = ceph_get_mds_session(s); >> + s = ci->i_auth_cap->session; >> + if (likely(s->s_mds < max_sessions) >> + && !sessions[s->s_mds]) { > Hi Xiubo, > > Nit: keep && on the previous line for style consistency. Sure. Will fix it. > >> + sessions[s->s_mds] = ceph_get_mds_session(s); >> + } >> } >> spin_unlock(&ci->i_ceph_lock); >> + } >> + mutex_unlock(&mdsc->mutex); >> >> - /* send flush mdlog request to MDSes */ >> + /* send flush mdlog request to MDSes */ >> + if (sessions) { > Since mdlog is flushed only in "if (req1 || req2)" case, why not keep > max_sessions loop there and avoid sessions != NULL check? This is because I must drop the mdsc->mutex before calling "send_flush_mdlog()" in the max_sessions loop. Thanks! - Xiubo > > Then, you could also move mdsc->mutex acquisition and max_sessions > assignment into "if (req1 || req2)" branch and keep sessions, s and > i declarations where there are today. > > Thanks, > > Ilya >
On Wed, Nov 9, 2022 at 3:12 PM Xiubo Li <xiubli@redhat.com> wrote: > > > On 09/11/2022 20:56, Ilya Dryomov wrote: > > On Tue, Nov 8, 2022 at 2:56 PM <xiubli@redhat.com> wrote: > >> From: Xiubo Li <xiubli@redhat.com> > >> > >> The request's r_session maybe changed when it was forwarded or > >> resent. > >> > >> Cc: stable@vger.kernel.org > >> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 > >> Signed-off-by: Xiubo Li <xiubli@redhat.com> > >> --- > >> fs/ceph/caps.c | 60 ++++++++++++++++---------------------------------- > >> 1 file changed, 19 insertions(+), 41 deletions(-) > >> > >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c > >> index 894adfb4a092..83f9e18e3169 100644 > >> --- a/fs/ceph/caps.c > >> +++ b/fs/ceph/caps.c > >> @@ -2297,8 +2297,10 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > >> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; > >> struct ceph_inode_info *ci = ceph_inode(inode); > >> struct ceph_mds_request *req1 = NULL, *req2 = NULL; > >> + struct ceph_mds_session **sessions = NULL; > >> + struct ceph_mds_session *s; > >> unsigned int max_sessions; > >> - int ret, err = 0; > >> + int i, ret, err = 0; > >> > >> spin_lock(&ci->i_unsafe_lock); > >> if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { > >> @@ -2315,28 +2317,19 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > >> } > >> spin_unlock(&ci->i_unsafe_lock); > >> > >> - /* > >> - * The mdsc->max_sessions is unlikely to be changed > >> - * mostly, here we will retry it by reallocating the > >> - * sessions array memory to get rid of the mdsc->mutex > >> - * lock. > >> - */ > >> -retry: > >> - max_sessions = mdsc->max_sessions; > >> - > >> /* > >> * Trigger to flush the journal logs in all the relevant MDSes > >> * manually, or in the worst case we must wait at most 5 seconds > >> * to wait the journal logs to be flushed by the MDSes periodically. > >> */ > >> - if ((req1 || req2) && likely(max_sessions)) { > >> - struct ceph_mds_session **sessions = NULL; > >> - struct ceph_mds_session *s; > >> + mutex_lock(&mdsc->mutex); > >> + max_sessions = mdsc->max_sessions; > >> + if (req1 || req2) { > >> struct ceph_mds_request *req; > >> - int i; > >> > >> sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); > >> if (!sessions) { > >> + mutex_unlock(&mdsc->mutex); > >> err = -ENOMEM; > >> goto out; > >> } > >> @@ -2346,18 +2339,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > >> list_for_each_entry(req, &ci->i_unsafe_dirops, > >> r_unsafe_dir_item) { > >> s = req->r_session; > >> - if (!s) > >> + if (!s || unlikely(s->s_mds >= max_sessions)) > >> continue; > >> - if (unlikely(s->s_mds >= max_sessions)) { > >> - spin_unlock(&ci->i_unsafe_lock); > >> - for (i = 0; i < max_sessions; i++) { > >> - s = sessions[i]; > >> - if (s) > >> - ceph_put_mds_session(s); > >> - } > >> - kfree(sessions); > >> - goto retry; > >> - } > >> if (!sessions[s->s_mds]) { > >> s = ceph_get_mds_session(s); > >> sessions[s->s_mds] = s; > >> @@ -2368,18 +2351,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > >> list_for_each_entry(req, &ci->i_unsafe_iops, > >> r_unsafe_target_item) { > >> s = req->r_session; > >> - if (!s) > >> + if (!s || unlikely(s->s_mds >= max_sessions)) > >> continue; > >> - if (unlikely(s->s_mds >= max_sessions)) { > >> - spin_unlock(&ci->i_unsafe_lock); > >> - for (i = 0; i < max_sessions; i++) { > >> - s = sessions[i]; > >> - if (s) > >> - ceph_put_mds_session(s); > >> - } > >> - kfree(sessions); > >> - goto retry; > >> - } > >> if (!sessions[s->s_mds]) { > >> s = ceph_get_mds_session(s); > >> sessions[s->s_mds] = s; > >> @@ -2391,13 +2364,18 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) > >> /* the auth MDS */ > >> spin_lock(&ci->i_ceph_lock); > >> if (ci->i_auth_cap) { > >> - s = ci->i_auth_cap->session; > >> - if (!sessions[s->s_mds]) > >> - sessions[s->s_mds] = ceph_get_mds_session(s); > >> + s = ci->i_auth_cap->session; > >> + if (likely(s->s_mds < max_sessions) > >> + && !sessions[s->s_mds]) { > > Hi Xiubo, > > > > Nit: keep && on the previous line for style consistency. > > Sure. Will fix it. > > > > > >> + sessions[s->s_mds] = ceph_get_mds_session(s); > >> + } > >> } > >> spin_unlock(&ci->i_ceph_lock); > >> + } > >> + mutex_unlock(&mdsc->mutex); > >> > >> - /* send flush mdlog request to MDSes */ > >> + /* send flush mdlog request to MDSes */ > >> + if (sessions) { > > Since mdlog is flushed only in "if (req1 || req2)" case, why not keep > > max_sessions loop there and avoid sessions != NULL check? > > This is because I must drop the mdsc->mutex before calling > "send_flush_mdlog()" in the max_sessions loop. If you move mdsc->mutex acquisition and max_sessions assignment into "if (req1 || req2)" branch, it can be trivially dropped before the loop. Thanks, Ilya
On 09/11/2022 22:16, Ilya Dryomov wrote: > On Wed, Nov 9, 2022 at 3:12 PM Xiubo Li <xiubli@redhat.com> wrote: >> >> On 09/11/2022 20:56, Ilya Dryomov wrote: >>> On Tue, Nov 8, 2022 at 2:56 PM <xiubli@redhat.com> wrote: >>>> From: Xiubo Li <xiubli@redhat.com> >>>> >>>> The request's r_session maybe changed when it was forwarded or >>>> resent. >>>> >>>> Cc: stable@vger.kernel.org >>>> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2137955 >>>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>>> --- >>>> fs/ceph/caps.c | 60 ++++++++++++++++---------------------------------- >>>> 1 file changed, 19 insertions(+), 41 deletions(-) >>>> >>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >>>> index 894adfb4a092..83f9e18e3169 100644 >>>> --- a/fs/ceph/caps.c >>>> +++ b/fs/ceph/caps.c >>>> @@ -2297,8 +2297,10 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >>>> struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; >>>> struct ceph_inode_info *ci = ceph_inode(inode); >>>> struct ceph_mds_request *req1 = NULL, *req2 = NULL; >>>> + struct ceph_mds_session **sessions = NULL; >>>> + struct ceph_mds_session *s; >>>> unsigned int max_sessions; >>>> - int ret, err = 0; >>>> + int i, ret, err = 0; >>>> >>>> spin_lock(&ci->i_unsafe_lock); >>>> if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { >>>> @@ -2315,28 +2317,19 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >>>> } >>>> spin_unlock(&ci->i_unsafe_lock); >>>> >>>> - /* >>>> - * The mdsc->max_sessions is unlikely to be changed >>>> - * mostly, here we will retry it by reallocating the >>>> - * sessions array memory to get rid of the mdsc->mutex >>>> - * lock. >>>> - */ >>>> -retry: >>>> - max_sessions = mdsc->max_sessions; >>>> - >>>> /* >>>> * Trigger to flush the journal logs in all the relevant MDSes >>>> * manually, or in the worst case we must wait at most 5 seconds >>>> * to wait the journal logs to be flushed by the MDSes periodically. >>>> */ >>>> - if ((req1 || req2) && likely(max_sessions)) { >>>> - struct ceph_mds_session **sessions = NULL; >>>> - struct ceph_mds_session *s; >>>> + mutex_lock(&mdsc->mutex); >>>> + max_sessions = mdsc->max_sessions; >>>> + if (req1 || req2) { >>>> struct ceph_mds_request *req; >>>> - int i; >>>> >>>> sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); >>>> if (!sessions) { >>>> + mutex_unlock(&mdsc->mutex); >>>> err = -ENOMEM; >>>> goto out; >>>> } >>>> @@ -2346,18 +2339,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >>>> list_for_each_entry(req, &ci->i_unsafe_dirops, >>>> r_unsafe_dir_item) { >>>> s = req->r_session; >>>> - if (!s) >>>> + if (!s || unlikely(s->s_mds >= max_sessions)) >>>> continue; >>>> - if (unlikely(s->s_mds >= max_sessions)) { >>>> - spin_unlock(&ci->i_unsafe_lock); >>>> - for (i = 0; i < max_sessions; i++) { >>>> - s = sessions[i]; >>>> - if (s) >>>> - ceph_put_mds_session(s); >>>> - } >>>> - kfree(sessions); >>>> - goto retry; >>>> - } >>>> if (!sessions[s->s_mds]) { >>>> s = ceph_get_mds_session(s); >>>> sessions[s->s_mds] = s; >>>> @@ -2368,18 +2351,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >>>> list_for_each_entry(req, &ci->i_unsafe_iops, >>>> r_unsafe_target_item) { >>>> s = req->r_session; >>>> - if (!s) >>>> + if (!s || unlikely(s->s_mds >= max_sessions)) >>>> continue; >>>> - if (unlikely(s->s_mds >= max_sessions)) { >>>> - spin_unlock(&ci->i_unsafe_lock); >>>> - for (i = 0; i < max_sessions; i++) { >>>> - s = sessions[i]; >>>> - if (s) >>>> - ceph_put_mds_session(s); >>>> - } >>>> - kfree(sessions); >>>> - goto retry; >>>> - } >>>> if (!sessions[s->s_mds]) { >>>> s = ceph_get_mds_session(s); >>>> sessions[s->s_mds] = s; >>>> @@ -2391,13 +2364,18 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) >>>> /* the auth MDS */ >>>> spin_lock(&ci->i_ceph_lock); >>>> if (ci->i_auth_cap) { >>>> - s = ci->i_auth_cap->session; >>>> - if (!sessions[s->s_mds]) >>>> - sessions[s->s_mds] = ceph_get_mds_session(s); >>>> + s = ci->i_auth_cap->session; >>>> + if (likely(s->s_mds < max_sessions) >>>> + && !sessions[s->s_mds]) { >>> Hi Xiubo, >>> >>> Nit: keep && on the previous line for style consistency. >> Sure. Will fix it. >> >> >>>> + sessions[s->s_mds] = ceph_get_mds_session(s); >>>> + } >>>> } >>>> spin_unlock(&ci->i_ceph_lock); >>>> + } >>>> + mutex_unlock(&mdsc->mutex); >>>> >>>> - /* send flush mdlog request to MDSes */ >>>> + /* send flush mdlog request to MDSes */ >>>> + if (sessions) { >>> Since mdlog is flushed only in "if (req1 || req2)" case, why not keep >>> max_sessions loop there and avoid sessions != NULL check? >> This is because I must drop the mdsc->mutex before calling >> "send_flush_mdlog()" in the max_sessions loop. > If you move mdsc->mutex acquisition and max_sessions assignment > into "if (req1 || req2)" branch, it can be trivially dropped before > the loop. Okay, sounds good. Thanks! - Xiubo > Thanks, > > Ilya >
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 894adfb4a092..83f9e18e3169 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2297,8 +2297,10 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + struct ceph_mds_session **sessions = NULL; + struct ceph_mds_session *s; unsigned int max_sessions; - int ret, err = 0; + int i, ret, err = 0; spin_lock(&ci->i_unsafe_lock); if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) { @@ -2315,28 +2317,19 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions array memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max_sessions = mdsc->max_sessions; - /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if ((req1 || req2) && likely(max_sessions)) { - struct ceph_mds_session **sessions = NULL; - struct ceph_mds_session *s; + mutex_lock(&mdsc->mutex); + max_sessions = mdsc->max_sessions; + if (req1 || req2) { struct ceph_mds_request *req; - int i; sessions = kcalloc(max_sessions, sizeof(s), GFP_KERNEL); if (!sessions) { + mutex_unlock(&mdsc->mutex); err = -ENOMEM; goto out; } @@ -2346,18 +2339,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (!s) + if (!s || unlikely(s->s_mds >= max_sessions)) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2368,18 +2351,8 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (!s) + if (!s || unlikely(s->s_mds >= max_sessions)) continue; - if (unlikely(s->s_mds >= max_sessions)) { - spin_unlock(&ci->i_unsafe_lock); - for (i = 0; i < max_sessions; i++) { - s = sessions[i]; - if (s) - ceph_put_mds_session(s); - } - kfree(sessions); - goto retry; - } if (!sessions[s->s_mds]) { s = ceph_get_mds_session(s); sessions[s->s_mds] = s; @@ -2391,13 +2364,18 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) /* the auth MDS */ spin_lock(&ci->i_ceph_lock); if (ci->i_auth_cap) { - s = ci->i_auth_cap->session; - if (!sessions[s->s_mds]) - sessions[s->s_mds] = ceph_get_mds_session(s); + s = ci->i_auth_cap->session; + if (likely(s->s_mds < max_sessions) + && !sessions[s->s_mds]) { + sessions[s->s_mds] = ceph_get_mds_session(s); + } } spin_unlock(&ci->i_ceph_lock); + } + mutex_unlock(&mdsc->mutex); - /* send flush mdlog request to MDSes */ + /* send flush mdlog request to MDSes */ + if (sessions) { for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { @@ -2405,7 +2383,6 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) ceph_put_mds_session(s); } } - kfree(sessions); } dout("%s %p wait on tid %llu %llu\n", __func__, @@ -2424,6 +2401,7 @@ static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode) } out: + kfree(sessions); if (req1) ceph_mdsc_put_request(req1); if (req2)