diff mbox series

ceph: clean the dirty page when session is closed or rejected

Message ID 20191209092830.22157-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: clean the dirty page when session is closed or rejected | expand

Commit Message

Xiubo Li Dec. 9, 2019, 9:28 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Try to queue writeback and invalidate the dirty pages when sessions
are closed, rejected or reconnect denied.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/mds_client.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Jeff Layton Dec. 9, 2019, 11:38 a.m. UTC | #1
On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Try to queue writeback and invalidate the dirty pages when sessions
> are closed, rejected or reconnect denied.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/mds_client.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 

Can you explain a bit more about the problem you're fixing? In what
situation is this currently broken, and what are the effects of that
breakage?

> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index be1ac9f8e0e6..68f3b5ed6ac8 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  {
>  	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>  	struct ceph_inode_info *ci = ceph_inode(inode);
> +	struct ceph_mds_session *session = cap->session;
>  	LIST_HEAD(to_remove);
>  	bool dirty_dropped = false;
>  	bool invalidate = false;
> +	bool writeback = false;
>  
>  	dout("removing cap %p, ci is %p, inode is %p\n",
>  	     cap, ci, &ci->vfs_inode);
> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
>  		struct ceph_mds_client *mdsc = fsc->mdsc;
> +		int s_state = session->s_state;
>  
>  		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>  			if (inode->i_data.nrpages > 0)
>  				invalidate = true;
>  			if (ci->i_wrbuffer_ref > 0)
>  				mapping_set_error(&inode->i_data, -EIO);
> +		} else if (s_state == CEPH_MDS_SESSION_CLOSED ||
> +			   s_state == CEPH_MDS_SESSION_REJECTED) {
> +			/* reconnect denied or rejected */
> +			if (!__ceph_is_any_real_caps(ci) &&
> +			    inode->i_data.nrpages > 0)
> +				invalidate = true;
> +			if (ci->i_wrbuffer_ref > 0)
> +				writeback = true;

I don't know here. If the session is CLOSED/REJECTED, is kicking off
writeback the right thing to do? In principle, this means that the
client may have been blacklisted and none of the writes will succeed.

Maybe this is the right thing to do, but I think I need more convincing.

>  		}
>  
>  		while (!list_empty(&ci->i_cap_flush_list)) {
> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	}
>  
>  	wake_up_all(&ci->i_cap_wq);
> +	if (writeback)
> +		ceph_queue_writeback(inode);
>  	if (invalidate)
>  		ceph_queue_invalidate(inode);
>  	if (dirty_dropped)

Thanks,
Xiubo Li Dec. 9, 2019, 11:54 a.m. UTC | #2
On 2019/12/9 19:38, Jeff Layton wrote:
> On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> Try to queue writeback and invalidate the dirty pages when sessions
>> are closed, rejected or reconnect denied.
>>
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>   fs/ceph/mds_client.c | 13 +++++++++++++
>>   1 file changed, 13 insertions(+)
>>
> Can you explain a bit more about the problem you're fixing? In what
> situation is this currently broken, and what are the effects of that
> breakage?
>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index be1ac9f8e0e6..68f3b5ed6ac8 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   {
>>   	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>> +	struct ceph_mds_session *session = cap->session;
>>   	LIST_HEAD(to_remove);
>>   	bool dirty_dropped = false;
>>   	bool invalidate = false;
>> +	bool writeback = false;
>>   
>>   	dout("removing cap %p, ci is %p, inode is %p\n",
>>   	     cap, ci, &ci->vfs_inode);
>> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	if (!ci->i_auth_cap) {
>>   		struct ceph_cap_flush *cf;
>>   		struct ceph_mds_client *mdsc = fsc->mdsc;
>> +		int s_state = session->s_state;
>>   
>>   		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>>   			if (inode->i_data.nrpages > 0)
>>   				invalidate = true;
>>   			if (ci->i_wrbuffer_ref > 0)
>>   				mapping_set_error(&inode->i_data, -EIO);
>> +		} else if (s_state == CEPH_MDS_SESSION_CLOSED ||
>> +			   s_state == CEPH_MDS_SESSION_REJECTED) {
>> +			/* reconnect denied or rejected */
>> +			if (!__ceph_is_any_real_caps(ci) &&
>> +			    inode->i_data.nrpages > 0)
>> +				invalidate = true;
>> +			if (ci->i_wrbuffer_ref > 0)
>> +				writeback = true;
> I don't know here. If the session is CLOSED/REJECTED, is kicking off
> writeback the right thing to do? In principle, this means that the
> client may have been blacklisted and none of the writes will succeed.

If the client was blacklisted,  it will be not safe to still buffer the 
data and flush it after the related sessions are reconnected without 
remounting.

Maybe we need to throw it directly.


> Maybe this is the right thing to do, but I think I need more convincing.
>
>>   		}
>>   
>>   		while (!list_empty(&ci->i_cap_flush_list)) {
>> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	}
>>   
>>   	wake_up_all(&ci->i_cap_wq);
>> +	if (writeback)
>> +		ceph_queue_writeback(inode);
>>   	if (invalidate)
>>   		ceph_queue_invalidate(inode);
>>   	if (dirty_dropped)
> Thanks,
Jeff Layton Dec. 9, 2019, 12:46 p.m. UTC | #3
On Mon, 2019-12-09 at 19:54 +0800, Xiubo Li wrote:
> On 2019/12/9 19:38, Jeff Layton wrote:
> > On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
> > > From: Xiubo Li <xiubli@redhat.com>
> > > 
> > > Try to queue writeback and invalidate the dirty pages when sessions
> > > are closed, rejected or reconnect denied.
> > > 
> > > Signed-off-by: Xiubo Li <xiubli@redhat.com>
> > > ---
> > >   fs/ceph/mds_client.c | 13 +++++++++++++
> > >   1 file changed, 13 insertions(+)
> > > 
> > Can you explain a bit more about the problem you're fixing? In what
> > situation is this currently broken, and what are the effects of that
> > breakage?
> > 
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index be1ac9f8e0e6..68f3b5ed6ac8 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   {
> > >   	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
> > >   	struct ceph_inode_info *ci = ceph_inode(inode);
> > > +	struct ceph_mds_session *session = cap->session;
> > >   	LIST_HEAD(to_remove);
> > >   	bool dirty_dropped = false;
> > >   	bool invalidate = false;
> > > +	bool writeback = false;
> > >   
> > >   	dout("removing cap %p, ci is %p, inode is %p\n",
> > >   	     cap, ci, &ci->vfs_inode);
> > > @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   	if (!ci->i_auth_cap) {
> > >   		struct ceph_cap_flush *cf;
> > >   		struct ceph_mds_client *mdsc = fsc->mdsc;
> > > +		int s_state = session->s_state;
> > >   
> > >   		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
> > >   			if (inode->i_data.nrpages > 0)
> > >   				invalidate = true;
> > >   			if (ci->i_wrbuffer_ref > 0)
> > >   				mapping_set_error(&inode->i_data, -EIO);
> > > +		} else if (s_state == CEPH_MDS_SESSION_CLOSED ||
> > > +			   s_state == CEPH_MDS_SESSION_REJECTED) {
> > > +			/* reconnect denied or rejected */
> > > +			if (!__ceph_is_any_real_caps(ci) &&
> > > +			    inode->i_data.nrpages > 0)
> > > +				invalidate = true;
> > > +			if (ci->i_wrbuffer_ref > 0)
> > > +				writeback = true;
> > I don't know here. If the session is CLOSED/REJECTED, is kicking off
> > writeback the right thing to do? In principle, this means that the
> > client may have been blacklisted and none of the writes will succeed.
> 
> If the client was blacklisted,  it will be not safe to still buffer the 
> data and flush it after the related sessions are reconnected without 
> remounting.
> 
> Maybe we need to throw it directly.
> 
> 

It depends. We have tunable behavior for this now.

I think you may want to look over the recover_session= patches  that
were merged v5.4 ((131d7eb4faa1fc, and previous patches). That mount
option governs what happens if the client is blacklisted, and then the
blacklisting is later removed.

Will this patch change either recover_session= behavior? If so, then you
should explain how and why.


> > >   		}
> > >   
> > >   		while (!list_empty(&ci->i_cap_flush_list)) {
> > > @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   	}
> > >   
> > >   	wake_up_all(&ci->i_cap_wq);
> > > +	if (writeback)
> > > +		ceph_queue_writeback(inode);
> > >   	if (invalidate)
> > >   		ceph_queue_invalidate(inode);
> > >   	if (dirty_dropped)
> > Thanks,
> 
>
Xiubo Li Dec. 10, 2019, 2:45 a.m. UTC | #4
On 2019/12/9 20:46, Jeff Layton wrote:
> On Mon, 2019-12-09 at 19:54 +0800, Xiubo Li wrote:
>> On 2019/12/9 19:38, Jeff Layton wrote:
>>> On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> Try to queue writeback and invalidate the dirty pages when sessions
>>>> are closed, rejected or reconnect denied.
>>>>
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>    fs/ceph/mds_client.c | 13 +++++++++++++
>>>>    1 file changed, 13 insertions(+)
>>>>
>>> Can you explain a bit more about the problem you're fixing? In what
>>> situation is this currently broken, and what are the effects of that
>>> breakage?
>>>
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index be1ac9f8e0e6..68f3b5ed6ac8 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    {
>>>>    	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>>>>    	struct ceph_inode_info *ci = ceph_inode(inode);
>>>> +	struct ceph_mds_session *session = cap->session;
>>>>    	LIST_HEAD(to_remove);
>>>>    	bool dirty_dropped = false;
>>>>    	bool invalidate = false;
>>>> +	bool writeback = false;
>>>>    
>>>>    	dout("removing cap %p, ci is %p, inode is %p\n",
>>>>    	     cap, ci, &ci->vfs_inode);
>>>> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    	if (!ci->i_auth_cap) {
>>>>    		struct ceph_cap_flush *cf;
>>>>    		struct ceph_mds_client *mdsc = fsc->mdsc;
>>>> +		int s_state = session->s_state;
>>>>    
>>>>    		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>>>>    			if (inode->i_data.nrpages > 0)
>>>>    				invalidate = true;
>>>>    			if (ci->i_wrbuffer_ref > 0)
>>>>    				mapping_set_error(&inode->i_data, -EIO);
>>>> +		} else if (s_state == CEPH_MDS_SESSION_CLOSED ||
>>>> +			   s_state == CEPH_MDS_SESSION_REJECTED) {
>>>> +			/* reconnect denied or rejected */
>>>> +			if (!__ceph_is_any_real_caps(ci) &&
>>>> +			    inode->i_data.nrpages > 0)
>>>> +				invalidate = true;
>>>> +			if (ci->i_wrbuffer_ref > 0)
>>>> +				writeback = true;
>>> I don't know here. If the session is CLOSED/REJECTED, is kicking off
>>> writeback the right thing to do? In principle, this means that the
>>> client may have been blacklisted and none of the writes will succeed.
>> If the client was blacklisted,  it will be not safe to still buffer the
>> data and flush it after the related sessions are reconnected without
>> remounting.
>>
>> Maybe we need to throw it directly.
>>
>>
> It depends. We have tunable behavior for this now.
>
> I think you may want to look over the recover_session= patches  that
> were merged v5.4 ((131d7eb4faa1fc, and previous patches). That mount
> option governs what happens if the client is blacklisted, and then the
> blacklisting is later removed.

Yeah, I checked that patches before and from my test of the blacklist 
stuff, the kclient's sessions won't recovery automatically even after 
the kclient is removed from the blacklisting. And the kclient will force 
remount after 30 minutes if recover_session=clean.

After a session is rejected due to blacklisting, the handle_session(case 
CEPH_MDS_SESSION_REJECTED: ) will requeue some inflight requests of this 
session, but these requests or the new requests couldn't use the 
rejected session, because the session->blacklisted=true could only be 
cleaned in the recovery_session=clean code until 30 minutes timedout, as 
default with recovery_session=no the session maybe keep 
session->blacklisted for ever (?), then I must do the remount manually.

I may not totally understand the purpose of the recovery_session= 
patches and I may miss something important here.

 From the https://docs.ceph.com/docs/master/cephfs/eviction/, maybe we 
could just throw away the dirty pages of the sessions directly, so after 
reconnected we can make sure it is safe without any stale buffered data.


Thanks.

> Will this patch change either recover_session= behavior? If so, then you
> should explain how and why.
>
>
>>>>    		}
>>>>    
>>>>    		while (!list_empty(&ci->i_cap_flush_list)) {
>>>> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>>>    	}
>>>>    
>>>>    	wake_up_all(&ci->i_cap_wq);
>>>> +	if (writeback)
>>>> +		ceph_queue_writeback(inode);
>>>>    	if (invalidate)
>>>>    		ceph_queue_invalidate(inode);
>>>>    	if (dirty_dropped)
>>> Thanks,
>>
Yan, Zheng Dec. 10, 2019, 3:30 a.m. UTC | #5
On 12/9/19 7:54 PM, Xiubo Li wrote:
> On 2019/12/9 19:38, Jeff Layton wrote:
>> On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> Try to queue writeback and invalidate the dirty pages when sessions
>>> are closed, rejected or reconnect denied.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/mds_client.c | 13 +++++++++++++
>>>   1 file changed, 13 insertions(+)
>>>
>> Can you explain a bit more about the problem you're fixing? In what
>> situation is this currently broken, and what are the effects of that
>> breakage?
>>
>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>> index be1ac9f8e0e6..68f3b5ed6ac8 100644
>>> --- a/fs/ceph/mds_client.c
>>> +++ b/fs/ceph/mds_client.c
>>> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct inode 
>>> *inode, struct ceph_cap *cap,
>>>   {
>>>       struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>>>       struct ceph_inode_info *ci = ceph_inode(inode);
>>> +    struct ceph_mds_session *session = cap->session;
>>>       LIST_HEAD(to_remove);
>>>       bool dirty_dropped = false;
>>>       bool invalidate = false;
>>> +    bool writeback = false;
>>>       dout("removing cap %p, ci is %p, inode is %p\n",
>>>            cap, ci, &ci->vfs_inode);
>>> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct 
>>> inode *inode, struct ceph_cap *cap,
>>>       if (!ci->i_auth_cap) {
>>>           struct ceph_cap_flush *cf;
>>>           struct ceph_mds_client *mdsc = fsc->mdsc;
>>> +        int s_state = session->s_state;
>>>           if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>>>               if (inode->i_data.nrpages > 0)
>>>                   invalidate = true;
>>>               if (ci->i_wrbuffer_ref > 0)
>>>                   mapping_set_error(&inode->i_data, -EIO);
>>> +        } else if (s_state == CEPH_MDS_SESSION_CLOSED ||
>>> +               s_state == CEPH_MDS_SESSION_REJECTED) {
>>> +            /* reconnect denied or rejected */
>>> +            if (!__ceph_is_any_real_caps(ci) &&
>>> +                inode->i_data.nrpages > 0)
>>> +                invalidate = true;
>>> +            if (ci->i_wrbuffer_ref > 0)
>>> +                writeback = true;
>> I don't know here. If the session is CLOSED/REJECTED, is kicking off
>> writeback the right thing to do? In principle, this means that the
>> client may have been blacklisted and none of the writes will succeed.
> 
> If the client was blacklisted,  it will be not safe to still buffer the 
> data and flush it after the related sessions are reconnected without 
> remounting.
> 
> Maybe we need to throw it directly.

The auto reconnect code will invalidate page cache. I don't see why we 
need to add this code.

> 
> 
>> Maybe this is the right thing to do, but I think I need more convincing.
>>
>>>           }
>>>           while (!list_empty(&ci->i_cap_flush_list)) {
>>> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct inode 
>>> *inode, struct ceph_cap *cap,
>>>       }
>>>       wake_up_all(&ci->i_cap_wq);
>>> +    if (writeback)
>>> +        ceph_queue_writeback(inode);
>>>       if (invalidate)
>>>           ceph_queue_invalidate(inode);
>>>       if (dirty_dropped)
>> Thanks,
> 
>
Xiubo Li Dec. 10, 2019, 5:07 a.m. UTC | #6
On 2019/12/10 11:30, Yan, Zheng wrote:
> On 12/9/19 7:54 PM, Xiubo Li wrote:
>> On 2019/12/9 19:38, Jeff Layton wrote:
>>> On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> Try to queue writeback and invalidate the dirty pages when sessions
>>>> are closed, rejected or reconnect denied.
>>>>
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>>   fs/ceph/mds_client.c | 13 +++++++++++++
>>>>   1 file changed, 13 insertions(+)
>>>>
>>> Can you explain a bit more about the problem you're fixing? In what
>>> situation is this currently broken, and what are the effects of that
>>> breakage?
>>>
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index be1ac9f8e0e6..68f3b5ed6ac8 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct 
>>>> inode *inode, struct ceph_cap *cap,
>>>>   {
>>>>       struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>>>>       struct ceph_inode_info *ci = ceph_inode(inode);
>>>> +    struct ceph_mds_session *session = cap->session;
>>>>       LIST_HEAD(to_remove);
>>>>       bool dirty_dropped = false;
>>>>       bool invalidate = false;
>>>> +    bool writeback = false;
>>>>       dout("removing cap %p, ci is %p, inode is %p\n",
>>>>            cap, ci, &ci->vfs_inode);
>>>> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct 
>>>> inode *inode, struct ceph_cap *cap,
>>>>       if (!ci->i_auth_cap) {
>>>>           struct ceph_cap_flush *cf;
>>>>           struct ceph_mds_client *mdsc = fsc->mdsc;
>>>> +        int s_state = session->s_state;
>>>>           if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>>>>               if (inode->i_data.nrpages > 0)
>>>>                   invalidate = true;
>>>>               if (ci->i_wrbuffer_ref > 0)
>>>>                   mapping_set_error(&inode->i_data, -EIO);
>>>> +        } else if (s_state == CEPH_MDS_SESSION_CLOSED ||
>>>> +               s_state == CEPH_MDS_SESSION_REJECTED) {
>>>> +            /* reconnect denied or rejected */
>>>> +            if (!__ceph_is_any_real_caps(ci) &&
>>>> +                inode->i_data.nrpages > 0)
>>>> +                invalidate = true;
>>>> +            if (ci->i_wrbuffer_ref > 0)
>>>> +                writeback = true;
>>> I don't know here. If the session is CLOSED/REJECTED, is kicking off
>>> writeback the right thing to do? In principle, this means that the
>>> client may have been blacklisted and none of the writes will succeed.
>>
>> If the client was blacklisted,  it will be not safe to still buffer 
>> the data and flush it after the related sessions are reconnected 
>> without remounting.
>>
>> Maybe we need to throw it directly.
>
> The auto reconnect code will invalidate page cache. I don't see why we 
> need to add this code.
>
Yeah, it is.

While for none reconnect cases, such as when decreasing the mds_max in 
the cluster side, and the kclient will release some extra sessions, 
should we also do something for the page cache ?

Thanks.

>>
>>
>>> Maybe this is the right thing to do, but I think I need more 
>>> convincing.
>>>
>>>>           }
>>>>           while (!list_empty(&ci->i_cap_flush_list)) {
>>>> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct 
>>>> inode *inode, struct ceph_cap *cap,
>>>>       }
>>>>       wake_up_all(&ci->i_cap_wq);
>>>> +    if (writeback)
>>>> +        ceph_queue_writeback(inode);
>>>>       if (invalidate)
>>>>           ceph_queue_invalidate(inode);
>>>>       if (dirty_dropped)
>>> Thanks,
>>
>>
>
Yan, Zheng Dec. 10, 2019, 3:08 p.m. UTC | #7
On 12/10/19 1:07 PM, Xiubo Li wrote:
> On 2019/12/10 11:30, Yan, Zheng wrote:
>> On 12/9/19 7:54 PM, Xiubo Li wrote:
>>> On 2019/12/9 19:38, Jeff Layton wrote:
>>>> On Mon, 2019-12-09 at 04:28 -0500, xiubli@redhat.com wrote:
>>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>>
>>>>> Try to queue writeback and invalidate the dirty pages when sessions
>>>>> are closed, rejected or reconnect denied.
>>>>>
>>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>>> ---
>>>>>   fs/ceph/mds_client.c | 13 +++++++++++++
>>>>>   1 file changed, 13 insertions(+)
>>>>>
>>>> Can you explain a bit more about the problem you're fixing? In what
>>>> situation is this currently broken, and what are the effects of that
>>>> breakage?
>>>>
>>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>>> index be1ac9f8e0e6..68f3b5ed6ac8 100644
>>>>> --- a/fs/ceph/mds_client.c
>>>>> +++ b/fs/ceph/mds_client.c
>>>>> @@ -1385,9 +1385,11 @@ static int remove_session_caps_cb(struct 
>>>>> inode *inode, struct ceph_cap *cap,
>>>>>   {
>>>>>       struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
>>>>>       struct ceph_inode_info *ci = ceph_inode(inode);
>>>>> +    struct ceph_mds_session *session = cap->session;
>>>>>       LIST_HEAD(to_remove);
>>>>>       bool dirty_dropped = false;
>>>>>       bool invalidate = false;
>>>>> +    bool writeback = false;
>>>>>       dout("removing cap %p, ci is %p, inode is %p\n",
>>>>>            cap, ci, &ci->vfs_inode);
>>>>> @@ -1398,12 +1400,21 @@ static int remove_session_caps_cb(struct 
>>>>> inode *inode, struct ceph_cap *cap,
>>>>>       if (!ci->i_auth_cap) {
>>>>>           struct ceph_cap_flush *cf;
>>>>>           struct ceph_mds_client *mdsc = fsc->mdsc;
>>>>> +        int s_state = session->s_state;
>>>>>           if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>>>>>               if (inode->i_data.nrpages > 0)
>>>>>                   invalidate = true;
>>>>>               if (ci->i_wrbuffer_ref > 0)
>>>>>                   mapping_set_error(&inode->i_data, -EIO);
>>>>> +        } else if (s_state == CEPH_MDS_SESSION_CLOSED ||
>>>>> +               s_state == CEPH_MDS_SESSION_REJECTED) {
>>>>> +            /* reconnect denied or rejected */
>>>>> +            if (!__ceph_is_any_real_caps(ci) &&
>>>>> +                inode->i_data.nrpages > 0)
>>>>> +                invalidate = true;
>>>>> +            if (ci->i_wrbuffer_ref > 0)
>>>>> +                writeback = true;
>>>> I don't know here. If the session is CLOSED/REJECTED, is kicking off
>>>> writeback the right thing to do? In principle, this means that the
>>>> client may have been blacklisted and none of the writes will succeed.
>>>
>>> If the client was blacklisted,  it will be not safe to still buffer 
>>> the data and flush it after the related sessions are reconnected 
>>> without remounting.
>>>
>>> Maybe we need to throw it directly.
>>
>> The auto reconnect code will invalidate page cache. I don't see why we 
>> need to add this code.
>>
> Yeah, it is.
> 
> While for none reconnect cases, such as when decreasing the mds_max in 
> the cluster side, and the kclient will release some extra sessions, 
> should we also do something for the page cache ?
> 

No, we shouldn't

> Thanks.
> 
>>>
>>>
>>>> Maybe this is the right thing to do, but I think I need more 
>>>> convincing.
>>>>
>>>>>           }
>>>>>           while (!list_empty(&ci->i_cap_flush_list)) {
>>>>> @@ -1472,6 +1483,8 @@ static int remove_session_caps_cb(struct 
>>>>> inode *inode, struct ceph_cap *cap,
>>>>>       }
>>>>>       wake_up_all(&ci->i_cap_wq);
>>>>> +    if (writeback)
>>>>> +        ceph_queue_writeback(inode);
>>>>>       if (invalidate)
>>>>>           ceph_queue_invalidate(inode);
>>>>>       if (dirty_dropped)
>>>> Thanks,
>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index be1ac9f8e0e6..68f3b5ed6ac8 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1385,9 +1385,11 @@  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 {
 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_session *session = cap->session;
 	LIST_HEAD(to_remove);
 	bool dirty_dropped = false;
 	bool invalidate = false;
+	bool writeback = false;
 
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
@@ -1398,12 +1400,21 @@  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
 		struct ceph_mds_client *mdsc = fsc->mdsc;
+		int s_state = session->s_state;
 
 		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
 			if (inode->i_data.nrpages > 0)
 				invalidate = true;
 			if (ci->i_wrbuffer_ref > 0)
 				mapping_set_error(&inode->i_data, -EIO);
+		} else if (s_state == CEPH_MDS_SESSION_CLOSED ||
+			   s_state == CEPH_MDS_SESSION_REJECTED) {
+			/* reconnect denied or rejected */
+			if (!__ceph_is_any_real_caps(ci) &&
+			    inode->i_data.nrpages > 0)
+				invalidate = true;
+			if (ci->i_wrbuffer_ref > 0)
+				writeback = true;
 		}
 
 		while (!list_empty(&ci->i_cap_flush_list)) {
@@ -1472,6 +1483,8 @@  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	}
 
 	wake_up_all(&ci->i_cap_wq);
+	if (writeback)
+		ceph_queue_writeback(inode);
 	if (invalidate)
 		ceph_queue_invalidate(inode);
 	if (dirty_dropped)