diff mbox series

[v3] ceph: fix use-after-free bug for inodes when flushing capsnaps

Message ID 20230606005253.1055933-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v3] ceph: fix use-after-free bug for inodes when flushing capsnaps | expand

Commit Message

Xiubo Li June 6, 2023, 12:52 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

There is a race between capsnaps flush and removing the inode from
'mdsc->snap_flush_list' list:

   == Thread A ==                     == Thread B ==
ceph_queue_cap_snap()
 -> allocate 'capsnapA'
 ->ihold('&ci->vfs_inode')
 ->add 'capsnapA' to 'ci->i_cap_snaps'
 ->add 'ci' to 'mdsc->snap_flush_list'
    ...
   == Thread C ==
ceph_flush_snaps()
 ->__ceph_flush_snaps()
  ->__send_flush_snap()
                                handle_cap_flushsnap_ack()
                                 ->iput('&ci->vfs_inode')
                                   this also will release 'ci'
                                    ...
				      == Thread D ==
                                ceph_handle_snap()
                                 ->flush_snaps()
                                  ->iterate 'mdsc->snap_flush_list'
                                   ->get the stale 'ci'
 ->remove 'ci' from                ->ihold(&ci->vfs_inode) this
   'mdsc->snap_flush_list'           will WARNING

To fix this we will increase the inode's i_count ref when adding 'ci'
to the 'mdsc->snap_flush_list' list.

Cc: stable@vger.kernel.org
URL: https://bugzilla.redhat.com/show_bug.cgi?id=2209299
Reviewed-by: Milind Changire <mchangir@redhat.com>
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---

V3:
- Fix two minor typo in commit comments.



 fs/ceph/caps.c | 6 ++++++
 fs/ceph/snap.c | 4 +++-
 2 files changed, 9 insertions(+), 1 deletion(-)

Comments

Ilya Dryomov June 6, 2023, 10:21 a.m. UTC | #1
On Tue, Jun 6, 2023 at 2:55 AM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> There is a race between capsnaps flush and removing the inode from
> 'mdsc->snap_flush_list' list:
>
>    == Thread A ==                     == Thread B ==
> ceph_queue_cap_snap()
>  -> allocate 'capsnapA'
>  ->ihold('&ci->vfs_inode')
>  ->add 'capsnapA' to 'ci->i_cap_snaps'
>  ->add 'ci' to 'mdsc->snap_flush_list'
>     ...
>    == Thread C ==
> ceph_flush_snaps()
>  ->__ceph_flush_snaps()
>   ->__send_flush_snap()
>                                 handle_cap_flushsnap_ack()
>                                  ->iput('&ci->vfs_inode')
>                                    this also will release 'ci'
>                                     ...
>                                       == Thread D ==
>                                 ceph_handle_snap()
>                                  ->flush_snaps()
>                                   ->iterate 'mdsc->snap_flush_list'
>                                    ->get the stale 'ci'
>  ->remove 'ci' from                ->ihold(&ci->vfs_inode) this
>    'mdsc->snap_flush_list'           will WARNING
>
> To fix this we will increase the inode's i_count ref when adding 'ci'
> to the 'mdsc->snap_flush_list' list.
>
> Cc: stable@vger.kernel.org
> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2209299
> Reviewed-by: Milind Changire <mchangir@redhat.com>
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V3:
> - Fix two minor typo in commit comments.
>
>
>
>  fs/ceph/caps.c | 6 ++++++
>  fs/ceph/snap.c | 4 +++-
>  2 files changed, 9 insertions(+), 1 deletion(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index feabf4cc0c4f..7c2cb813aba4 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -1684,6 +1684,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>         struct inode *inode = &ci->netfs.inode;
>         struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
>         struct ceph_mds_session *session = NULL;
> +       int put = 0;

Hi Xiubo,

Nit: renaming this variable to need_put and making it a bool would
communicate the intent better.

>         int mds;
>
>         dout("ceph_flush_snaps %p\n", inode);
> @@ -1728,8 +1729,13 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>                 ceph_put_mds_session(session);
>         /* we flushed them all; remove this inode from the queue */
>         spin_lock(&mdsc->snap_flush_lock);
> +       if (!list_empty(&ci->i_snap_flush_item))
> +               put++;

What are the cases when ci is expected to not be on snap_flush_list
list (and therefore there is no corresponding reference to put)?

The reason I'm asking is that ceph_flush_snaps() is called from two
other places directly (i.e. without iterating snap_flush_list list) and
then __ceph_flush_snaps() is called from two yet other places.  The
problem that we are presented with here is that __ceph_flush_snaps()
effectively consumes a reference on ci.  Is ci protected from being
freed by handle_cap_flushsnap_ack() very soon after __send_flush_snap()
returns in all these other places?

Thanks,

                Ilya
Xiubo Li June 6, 2023, 1:29 p.m. UTC | #2
On 6/6/23 18:21, Ilya Dryomov wrote:
> On Tue, Jun 6, 2023 at 2:55 AM <xiubli@redhat.com> wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> There is a race between capsnaps flush and removing the inode from
>> 'mdsc->snap_flush_list' list:
>>
>>     == Thread A ==                     == Thread B ==
>> ceph_queue_cap_snap()
>>   -> allocate 'capsnapA'
>>   ->ihold('&ci->vfs_inode')
>>   ->add 'capsnapA' to 'ci->i_cap_snaps'
>>   ->add 'ci' to 'mdsc->snap_flush_list'
>>      ...
>>     == Thread C ==
>> ceph_flush_snaps()
>>   ->__ceph_flush_snaps()
>>    ->__send_flush_snap()
>>                                  handle_cap_flushsnap_ack()
>>                                   ->iput('&ci->vfs_inode')
>>                                     this also will release 'ci'
>>                                      ...
>>                                        == Thread D ==
>>                                  ceph_handle_snap()
>>                                   ->flush_snaps()
>>                                    ->iterate 'mdsc->snap_flush_list'
>>                                     ->get the stale 'ci'
>>   ->remove 'ci' from                ->ihold(&ci->vfs_inode) this
>>     'mdsc->snap_flush_list'           will WARNING
>>
>> To fix this we will increase the inode's i_count ref when adding 'ci'
>> to the 'mdsc->snap_flush_list' list.
>>
>> Cc: stable@vger.kernel.org
>> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2209299
>> Reviewed-by: Milind Changire <mchangir@redhat.com>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V3:
>> - Fix two minor typo in commit comments.
>>
>>
>>
>>   fs/ceph/caps.c | 6 ++++++
>>   fs/ceph/snap.c | 4 +++-
>>   2 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index feabf4cc0c4f..7c2cb813aba4 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -1684,6 +1684,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>>          struct inode *inode = &ci->netfs.inode;
>>          struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
>>          struct ceph_mds_session *session = NULL;
>> +       int put = 0;
> Hi Xiubo,
>
> Nit: renaming this variable to need_put and making it a bool would
> communicate the intent better.

Hi Ilya

Sure, will update it.

>>          int mds;
>>
>>          dout("ceph_flush_snaps %p\n", inode);
>> @@ -1728,8 +1729,13 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>>                  ceph_put_mds_session(session);
>>          /* we flushed them all; remove this inode from the queue */
>>          spin_lock(&mdsc->snap_flush_lock);
>> +       if (!list_empty(&ci->i_snap_flush_item))
>> +               put++;
> What are the cases when ci is expected to not be on snap_flush_list
> list (and therefore there is no corresponding reference to put)?
>
> The reason I'm asking is that ceph_flush_snaps() is called from two
> other places directly (i.e. without iterating snap_flush_list list) and
> then __ceph_flush_snaps() is called from two yet other places.  The
> problem that we are presented with here is that __ceph_flush_snaps()
> effectively consumes a reference on ci.  Is ci protected from being
> freed by handle_cap_flushsnap_ack() very soon after __send_flush_snap()
> returns in all these other places?

There are 4 places will call the 'ceph_flush_snaps()':

Cscope tag: ceph_flush_snaps
    #   line  filename / context / line
    1   3221  fs/ceph/caps.c <<__ceph_put_cap_refs>>
              ceph_flush_snaps(ci, NULL);
    2   3336  fs/ceph/caps.c <<ceph_put_wrbuffer_cap_refs>>
              ceph_flush_snaps(ci, NULL);
    3   2243  fs/ceph/inode.c <<ceph_inode_work>>
              ceph_flush_snaps(ci, NULL);
    4    941  fs/ceph/snap.c <<flush_snaps>>
              ceph_flush_snaps(ci, &session);
Type number and <Enter> (q or empty cancels):

For #1 it will add the 'ci' to the 'mdsc->snap_flush_list' list by 
calling '__ceph_finish_cap_snap()' and then call the 
'ceph_flush_snaps()' directly or defer call it in the queue work in #3.

The #3 is the reason why we need the 'mdsc->snap_flush_list' list.

For #2 it won't add the 'ci' to the list because it will always call the 
'ceph_flush_snaps()' directly.

For #4 it will call 'ceph_flush_snaps()' by iterating the 
'mdsc->snap_flush_list' list just before the #3 being triggered.

The problem only exists in case of #1 --> #4, which will make the stale 
'ci' to be held in the 'mdsc->snap_flush_list' list after 'capsnap' and 
'ci' being freed. All the other cases are okay because the 'ci' will be 
protected by increasing the ref when allocating the 'capsnap' and will 
decrease the ref in 'handle_cap_flushsnap_ack()' when freeing the 'capsnap'.

Note: the '__ceph_flush_snaps()' won't increase the ref. The 
'handle_cap_flushsnap_ack()' will just try to decrease the ref and only 
in case the ref reaches to '0' will the 'ci' be freed.


Thanks

- Xiubo

> Thanks,
>
>                  Ilya
>
Ilya Dryomov June 7, 2023, 8:03 a.m. UTC | #3
On Tue, Jun 6, 2023 at 3:30 PM Xiubo Li <xiubli@redhat.com> wrote:
>
>
> On 6/6/23 18:21, Ilya Dryomov wrote:
> > On Tue, Jun 6, 2023 at 2:55 AM <xiubli@redhat.com> wrote:
> >> From: Xiubo Li <xiubli@redhat.com>
> >>
> >> There is a race between capsnaps flush and removing the inode from
> >> 'mdsc->snap_flush_list' list:
> >>
> >>     == Thread A ==                     == Thread B ==
> >> ceph_queue_cap_snap()
> >>   -> allocate 'capsnapA'
> >>   ->ihold('&ci->vfs_inode')
> >>   ->add 'capsnapA' to 'ci->i_cap_snaps'
> >>   ->add 'ci' to 'mdsc->snap_flush_list'
> >>      ...
> >>     == Thread C ==
> >> ceph_flush_snaps()
> >>   ->__ceph_flush_snaps()
> >>    ->__send_flush_snap()
> >>                                  handle_cap_flushsnap_ack()
> >>                                   ->iput('&ci->vfs_inode')
> >>                                     this also will release 'ci'
> >>                                      ...
> >>                                        == Thread D ==
> >>                                  ceph_handle_snap()
> >>                                   ->flush_snaps()
> >>                                    ->iterate 'mdsc->snap_flush_list'
> >>                                     ->get the stale 'ci'
> >>   ->remove 'ci' from                ->ihold(&ci->vfs_inode) this
> >>     'mdsc->snap_flush_list'           will WARNING
> >>
> >> To fix this we will increase the inode's i_count ref when adding 'ci'
> >> to the 'mdsc->snap_flush_list' list.
> >>
> >> Cc: stable@vger.kernel.org
> >> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2209299
> >> Reviewed-by: Milind Changire <mchangir@redhat.com>
> >> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >> ---
> >>
> >> V3:
> >> - Fix two minor typo in commit comments.
> >>
> >>
> >>
> >>   fs/ceph/caps.c | 6 ++++++
> >>   fs/ceph/snap.c | 4 +++-
> >>   2 files changed, 9 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index feabf4cc0c4f..7c2cb813aba4 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -1684,6 +1684,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
> >>          struct inode *inode = &ci->netfs.inode;
> >>          struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
> >>          struct ceph_mds_session *session = NULL;
> >> +       int put = 0;
> > Hi Xiubo,
> >
> > Nit: renaming this variable to need_put and making it a bool would
> > communicate the intent better.
>
> Hi Ilya
>
> Sure, will update it.
>
> >>          int mds;
> >>
> >>          dout("ceph_flush_snaps %p\n", inode);
> >> @@ -1728,8 +1729,13 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
> >>                  ceph_put_mds_session(session);
> >>          /* we flushed them all; remove this inode from the queue */
> >>          spin_lock(&mdsc->snap_flush_lock);
> >> +       if (!list_empty(&ci->i_snap_flush_item))
> >> +               put++;
> > What are the cases when ci is expected to not be on snap_flush_list
> > list (and therefore there is no corresponding reference to put)?
> >
> > The reason I'm asking is that ceph_flush_snaps() is called from two
> > other places directly (i.e. without iterating snap_flush_list list) and
> > then __ceph_flush_snaps() is called from two yet other places.  The
> > problem that we are presented with here is that __ceph_flush_snaps()
> > effectively consumes a reference on ci.  Is ci protected from being
> > freed by handle_cap_flushsnap_ack() very soon after __send_flush_snap()
> > returns in all these other places?
>
> There are 4 places will call the 'ceph_flush_snaps()':
>
> Cscope tag: ceph_flush_snaps
>     #   line  filename / context / line
>     1   3221  fs/ceph/caps.c <<__ceph_put_cap_refs>>
>               ceph_flush_snaps(ci, NULL);
>     2   3336  fs/ceph/caps.c <<ceph_put_wrbuffer_cap_refs>>
>               ceph_flush_snaps(ci, NULL);
>     3   2243  fs/ceph/inode.c <<ceph_inode_work>>
>               ceph_flush_snaps(ci, NULL);
>     4    941  fs/ceph/snap.c <<flush_snaps>>
>               ceph_flush_snaps(ci, &session);
> Type number and <Enter> (q or empty cancels):
>
> For #1 it will add the 'ci' to the 'mdsc->snap_flush_list' list by
> calling '__ceph_finish_cap_snap()' and then call the
> 'ceph_flush_snaps()' directly or defer call it in the queue work in #3.
>
> The #3 is the reason why we need the 'mdsc->snap_flush_list' list.
>
> For #2 it won't add the 'ci' to the list because it will always call the
> 'ceph_flush_snaps()' directly.
>
> For #4 it will call 'ceph_flush_snaps()' by iterating the
> 'mdsc->snap_flush_list' list just before the #3 being triggered.
>
> The problem only exists in case of #1 --> #4, which will make the stale
> 'ci' to be held in the 'mdsc->snap_flush_list' list after 'capsnap' and
> 'ci' being freed. All the other cases are okay because the 'ci' will be
> protected by increasing the ref when allocating the 'capsnap' and will
> decrease the ref in 'handle_cap_flushsnap_ack()' when freeing the 'capsnap'.
>
> Note: the '__ceph_flush_snaps()' won't increase the ref. The
> 'handle_cap_flushsnap_ack()' will just try to decrease the ref and only
> in case the ref reaches to '0' will the 'ci' be freed.

So my question is: are all __ceph_flush_snaps() callers guaranteed to
hold an extra (i.e. one that is not tied to capsnap) reference on ci so
that when handle_cap_flushsnap_ack() drops one that is tied to capsnap
the reference count doesn't reach 0?  It sounds like you are confident
that there is no issue with ceph_flush_snaps() callers, but it would be
nice if you could confirm the same for bare __ceph_flush_snaps() call
sites in caps.c.

Thanks,

                Ilya
Xiubo Li June 8, 2023, 2:07 a.m. UTC | #4
On 6/7/23 16:03, Ilya Dryomov wrote:
> On Tue, Jun 6, 2023 at 3:30 PM Xiubo Li <xiubli@redhat.com> wrote:
>>
>> On 6/6/23 18:21, Ilya Dryomov wrote:
>>> On Tue, Jun 6, 2023 at 2:55 AM <xiubli@redhat.com> wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> There is a race between capsnaps flush and removing the inode from
>>>> 'mdsc->snap_flush_list' list:
>>>>
>>>>      == Thread A ==                     == Thread B ==
>>>> ceph_queue_cap_snap()
>>>>    -> allocate 'capsnapA'
>>>>    ->ihold('&ci->vfs_inode')
>>>>    ->add 'capsnapA' to 'ci->i_cap_snaps'
>>>>    ->add 'ci' to 'mdsc->snap_flush_list'
>>>>       ...
>>>>      == Thread C ==
>>>> ceph_flush_snaps()
>>>>    ->__ceph_flush_snaps()
>>>>     ->__send_flush_snap()
>>>>                                   handle_cap_flushsnap_ack()
>>>>                                    ->iput('&ci->vfs_inode')
>>>>                                      this also will release 'ci'
>>>>                                       ...
>>>>                                         == Thread D ==
>>>>                                   ceph_handle_snap()
>>>>                                    ->flush_snaps()
>>>>                                     ->iterate 'mdsc->snap_flush_list'
>>>>                                      ->get the stale 'ci'
>>>>    ->remove 'ci' from                ->ihold(&ci->vfs_inode) this
>>>>      'mdsc->snap_flush_list'           will WARNING
>>>>
>>>> To fix this we will increase the inode's i_count ref when adding 'ci'
>>>> to the 'mdsc->snap_flush_list' list.
>>>>
>>>> Cc: stable@vger.kernel.org
>>>> URL: https://bugzilla.redhat.com/show_bug.cgi?id=2209299
>>>> Reviewed-by: Milind Changire <mchangir@redhat.com>
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>
>>>> V3:
>>>> - Fix two minor typo in commit comments.
>>>>
>>>>
>>>>
>>>>    fs/ceph/caps.c | 6 ++++++
>>>>    fs/ceph/snap.c | 4 +++-
>>>>    2 files changed, 9 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index feabf4cc0c4f..7c2cb813aba4 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -1684,6 +1684,7 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>>>>           struct inode *inode = &ci->netfs.inode;
>>>>           struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
>>>>           struct ceph_mds_session *session = NULL;
>>>> +       int put = 0;
>>> Hi Xiubo,
>>>
>>> Nit: renaming this variable to need_put and making it a bool would
>>> communicate the intent better.
>> Hi Ilya
>>
>> Sure, will update it.
>>
>>>>           int mds;
>>>>
>>>>           dout("ceph_flush_snaps %p\n", inode);
>>>> @@ -1728,8 +1729,13 @@ void ceph_flush_snaps(struct ceph_inode_info *ci,
>>>>                   ceph_put_mds_session(session);
>>>>           /* we flushed them all; remove this inode from the queue */
>>>>           spin_lock(&mdsc->snap_flush_lock);
>>>> +       if (!list_empty(&ci->i_snap_flush_item))
>>>> +               put++;
>>> What are the cases when ci is expected to not be on snap_flush_list
>>> list (and therefore there is no corresponding reference to put)?
>>>
>>> The reason I'm asking is that ceph_flush_snaps() is called from two
>>> other places directly (i.e. without iterating snap_flush_list list) and
>>> then __ceph_flush_snaps() is called from two yet other places.  The
>>> problem that we are presented with here is that __ceph_flush_snaps()
>>> effectively consumes a reference on ci.  Is ci protected from being
>>> freed by handle_cap_flushsnap_ack() very soon after __send_flush_snap()
>>> returns in all these other places?
>> There are 4 places will call the 'ceph_flush_snaps()':
>>
>> Cscope tag: ceph_flush_snaps
>>      #   line  filename / context / line
>>      1   3221  fs/ceph/caps.c <<__ceph_put_cap_refs>>
>>                ceph_flush_snaps(ci, NULL);
>>      2   3336  fs/ceph/caps.c <<ceph_put_wrbuffer_cap_refs>>
>>                ceph_flush_snaps(ci, NULL);
>>      3   2243  fs/ceph/inode.c <<ceph_inode_work>>
>>                ceph_flush_snaps(ci, NULL);
>>      4    941  fs/ceph/snap.c <<flush_snaps>>
>>                ceph_flush_snaps(ci, &session);
>> Type number and <Enter> (q or empty cancels):
>>
>> For #1 it will add the 'ci' to the 'mdsc->snap_flush_list' list by
>> calling '__ceph_finish_cap_snap()' and then call the
>> 'ceph_flush_snaps()' directly or defer call it in the queue work in #3.
>>
>> The #3 is the reason why we need the 'mdsc->snap_flush_list' list.
>>
>> For #2 it won't add the 'ci' to the list because it will always call the
>> 'ceph_flush_snaps()' directly.
>>
>> For #4 it will call 'ceph_flush_snaps()' by iterating the
>> 'mdsc->snap_flush_list' list just before the #3 being triggered.
>>
>> The problem only exists in case of #1 --> #4, which will make the stale
>> 'ci' to be held in the 'mdsc->snap_flush_list' list after 'capsnap' and
>> 'ci' being freed. All the other cases are okay because the 'ci' will be
>> protected by increasing the ref when allocating the 'capsnap' and will
>> decrease the ref in 'handle_cap_flushsnap_ack()' when freeing the 'capsnap'.
>>
>> Note: the '__ceph_flush_snaps()' won't increase the ref. The
>> 'handle_cap_flushsnap_ack()' will just try to decrease the ref and only
>> in case the ref reaches to '0' will the 'ci' be freed.
> So my question is: are all __ceph_flush_snaps() callers guaranteed to
> hold an extra (i.e. one that is not tied to capsnap) reference on ci so
> that when handle_cap_flushsnap_ack() drops one that is tied to capsnap
> the reference count doesn't reach 0?  It sounds like you are confident
> that there is no issue with ceph_flush_snaps() callers, but it would be
> nice if you could confirm the same for bare __ceph_flush_snaps() call
> sites in caps.c.

Yeah, checked the code again carefully.  I am sure that when calling the 
'__ceph_flush_snaps()' it has already well handled the reference too.

Once the 'capsnap' is in 'ci->i_cap_snaps' list the inode's reference 
must have already been increased, please see 'ceph_queue_cap_snap()', 
which will allocate and insert 'capsnap' to this list.

Except the 'mdsc->snap_flush_list' list, the 'capsnap' will be added to 
three other lists, which are 'ci->i_cap_snaps', 'mdsc->cap_flush_list' 
and 'ci->i_cap_flush_list'.


  744 INIT_LIST_HEAD(&capsnap->cap_flush.i_list);      --> 
ci->i_cap_flush_list
  745 INIT_LIST_HEAD(&capsnap->cap_flush.g_list);    --> 
mdsc->cap_flush_list
  746 INIT_LIST_HEAD(&capsnap->ci_item);                   --> 
ci->i_cap_snaps


But 'capsnap' will be removed from them all just before freeing the 
'capsnap' and iput(inode), more detail please see:


   handle_cap_flushsnap_ack()
       -->  ceph_remove_capsnap()
             --> __ceph_remove_capsnap()
                  --> list_del_init(&capsnap->ci_item); // remove from 
'ci->i_cap_snaps' list
                  --> __detach_cap_flush_from_ci()         // remove 
from 'ci->i_cap_flush_list' list
                 --> __detach_cap_flush_from_mdsc()} // remove from 
'mdsc->cap_flush_list'
       --> ceph_put_cap_snap(capsnap)
       --> iput(inode)


The 'mdsc->cap_flush_list' list will be proected by 
'mdsc->cap_dirty_lock', so do not have any issue here.

So I am sure that just before 'capsnap' being freed the 'ci' won't 
released in the above case.

Thanks

- Xiubo

> Thanks,
>
>                  Ilya
>
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index feabf4cc0c4f..7c2cb813aba4 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1684,6 +1684,7 @@  void ceph_flush_snaps(struct ceph_inode_info *ci,
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
 	struct ceph_mds_session *session = NULL;
+	int put = 0;
 	int mds;
 
 	dout("ceph_flush_snaps %p\n", inode);
@@ -1728,8 +1729,13 @@  void ceph_flush_snaps(struct ceph_inode_info *ci,
 		ceph_put_mds_session(session);
 	/* we flushed them all; remove this inode from the queue */
 	spin_lock(&mdsc->snap_flush_lock);
+	if (!list_empty(&ci->i_snap_flush_item))
+		put++;
 	list_del_init(&ci->i_snap_flush_item);
 	spin_unlock(&mdsc->snap_flush_lock);
+
+	if (put)
+		iput(inode);
 }
 
 /*
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 5a4bf0201737..d5ad10d94424 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -697,8 +697,10 @@  int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 	     capsnap->size);
 
 	spin_lock(&mdsc->snap_flush_lock);
-	if (list_empty(&ci->i_snap_flush_item))
+	if (list_empty(&ci->i_snap_flush_item)) {
+		ihold(inode);
 		list_add_tail(&ci->i_snap_flush_item, &mdsc->snap_flush_list);
+	}
 	spin_unlock(&mdsc->snap_flush_lock);
 	return 1;  /* caller may want to ceph_flush_snaps */
 }