[v2,1/4] ceph: always renew caps if mds_wanted is insufficient
diff mbox series

Message ID 20200221131659.87777-2-zyan@redhat.com
State New
Headers show
Series
  • ceph: don't request caps for idle open files
Related show

Commit Message

Yan, Zheng Feb. 21, 2020, 1:16 p.m. UTC
original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags.
The flag indicates that mds closed session and caps were dropped. This
patch is preparation for not requesting caps for idle open files.

CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also
remove it.

Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/caps.c       | 36 +++++++++++++++---------------------
 fs/ceph/mds_client.c |  5 -----
 fs/ceph/super.h      | 11 +++++------
 3 files changed, 20 insertions(+), 32 deletions(-)

Comments

Jeff Layton Feb. 21, 2020, 5:17 p.m. UTC | #1
On Fri, 2020-02-21 at 21:16 +0800, Yan, Zheng wrote:
> original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags.
> The flag indicates that mds closed session and caps were dropped. This
> patch is preparation for not requesting caps for idle open files.
> 
> CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also
> remove it.
> 
> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/caps.c       | 36 +++++++++++++++---------------------
>  fs/ceph/mds_client.c |  5 -----
>  fs/ceph/super.h      | 11 +++++------
>  3 files changed, 20 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index d05717397c2a..293920d013ff 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -2659,6 +2659,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>  		}
>  	} else {
>  		int session_readonly = false;
> +		int mds_wanted;
>  		if (ci->i_auth_cap &&
>  		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
>  			struct ceph_mds_session *s = ci->i_auth_cap->session;
> @@ -2667,32 +2668,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>  			spin_unlock(&s->s_cap_lock);
>  		}
>  		if (session_readonly) {
> -			dout("get_cap_refs %p needed %s but mds%d readonly\n",
> +			dout("get_cap_refs %p need %s but mds%d readonly\n",
>  			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
>  			ret = -EROFS;
>  			goto out_unlock;
>  		}
>  
> -		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
> -			int mds_wanted;
> -			if (READ_ONCE(mdsc->fsc->mount_state) ==
> -			    CEPH_MOUNT_SHUTDOWN) {
> -				dout("get_cap_refs %p forced umount\n", inode);
> -				ret = -EIO;
> -				goto out_unlock;
> -			}
> -			mds_wanted = __ceph_caps_mds_wanted(ci, false);
> -			if (need & ~(mds_wanted & need)) {
> -				dout("get_cap_refs %p caps were dropped"
> -				     " (session killed?)\n", inode);
> -				ret = -ESTALE;
> -				goto out_unlock;
> -			}
> -			if (!(file_wanted & ~mds_wanted))
> -				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
> +		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
> +			dout("get_cap_refs %p forced umount\n", inode);
> +			ret = -EIO;
> +			goto out_unlock;
> +		}
> +		mds_wanted = __ceph_caps_mds_wanted(ci, false);
> +		if (need & ~mds_wanted) {
> +			dout("get_cap_refs %p need %s > mds_wanted %s\n",
> +			     inode, ceph_cap_string(need),
> +			     ceph_cap_string(mds_wanted));
> +			ret = -ESTALE;
> +			goto out_unlock;
>  		}
>  

I was able to reproduce softlockups with xfstests reliably for a little
while, but it doesn't always happen. I bisected it down to this patch
though. I suspect the problem is in the hunk above. It looks like it's
getting into a situation where this is continually returning ESTALE.

I cranked up debug logging in this function and I see this:

[  259.284839] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.284848] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.284855] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.284863] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284868] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
[  259.284877] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284885] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284890] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284899] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.284908] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
[  259.284918] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284926] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284945] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
[  259.284950] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284961] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284969] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
[  259.284975] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.284984] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
[  259.284994] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
[  259.285003] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc

...not sure I understand the logical flow in this function well enough
to suggest a fix yet though.

> -		dout("get_cap_refs %p have %s needed %s\n", inode,
> +		dout("get_cap_refs %p have %s need %s\n", inode,
>  		     ceph_cap_string(have), ceph_cap_string(need));
>  	}
>  out_unlock:
> @@ -3646,8 +3642,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>  		goto out_unlock;
>  
>  	if (target < 0) {
> -		if (cap->mds_wanted | cap->issued)
> -			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>  		__ceph_remove_cap(cap, false);
>  		goto out_unlock;
>  	}
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index fab9d6461a65..98d746b3bb53 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -1411,8 +1411,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>  	dout("removing cap %p, ci is %p, inode is %p\n",
>  	     cap, ci, &ci->vfs_inode);
>  	spin_lock(&ci->i_ceph_lock);
> -	if (cap->mds_wanted | cap->issued)
> -		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>  	__ceph_remove_cap(cap, false);
>  	if (!ci->i_auth_cap) {
>  		struct ceph_cap_flush *cf;
> @@ -1578,9 +1576,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
>  			/* mds did not re-issue stale cap */
>  			spin_lock(&ci->i_ceph_lock);
>  			cap->issued = cap->implemented = CEPH_CAP_PIN;
> -			/* make sure mds knows what we want */
> -			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
> -				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>  			spin_unlock(&ci->i_ceph_lock);
>  		}
>  	} else if (ev == FORCE_RO) {
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 37dc1ac8f6c3..48e84d7f48a0 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -517,12 +517,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
>  #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
>  #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
>  #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
> -#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
> -#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
> -#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
> -#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
> -#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
> -#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
> +#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
> +#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
> +#define CEPH_I_ERROR_WRITE	(1 << 9)  /* have seen write errors */
> +#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
> +#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
>  
>  /*
>   * Masks of ceph inode work.
Yan, Zheng Feb. 24, 2020, 3:17 a.m. UTC | #2
On 2/22/20 1:17 AM, Jeff Layton wrote:
> On Fri, 2020-02-21 at 21:16 +0800, Yan, Zheng wrote:
>> original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags.
>> The flag indicates that mds closed session and caps were dropped. This
>> patch is preparation for not requesting caps for idle open files.
>>
>> CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also
>> remove it.
>>
>> Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
>> ---
>>   fs/ceph/caps.c       | 36 +++++++++++++++---------------------
>>   fs/ceph/mds_client.c |  5 -----
>>   fs/ceph/super.h      | 11 +++++------
>>   3 files changed, 20 insertions(+), 32 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index d05717397c2a..293920d013ff 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -2659,6 +2659,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>>   		}
>>   	} else {
>>   		int session_readonly = false;
>> +		int mds_wanted;
>>   		if (ci->i_auth_cap &&
>>   		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
>>   			struct ceph_mds_session *s = ci->i_auth_cap->session;
>> @@ -2667,32 +2668,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
>>   			spin_unlock(&s->s_cap_lock);
>>   		}
>>   		if (session_readonly) {
>> -			dout("get_cap_refs %p needed %s but mds%d readonly\n",
>> +			dout("get_cap_refs %p need %s but mds%d readonly\n",
>>   			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
>>   			ret = -EROFS;
>>   			goto out_unlock;
>>   		}
>>   
>> -		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
>> -			int mds_wanted;
>> -			if (READ_ONCE(mdsc->fsc->mount_state) ==
>> -			    CEPH_MOUNT_SHUTDOWN) {
>> -				dout("get_cap_refs %p forced umount\n", inode);
>> -				ret = -EIO;
>> -				goto out_unlock;
>> -			}
>> -			mds_wanted = __ceph_caps_mds_wanted(ci, false);
>> -			if (need & ~(mds_wanted & need)) {
>> -				dout("get_cap_refs %p caps were dropped"
>> -				     " (session killed?)\n", inode);
>> -				ret = -ESTALE;
>> -				goto out_unlock;
>> -			}
>> -			if (!(file_wanted & ~mds_wanted))
>> -				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
>> +		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
>> +			dout("get_cap_refs %p forced umount\n", inode);
>> +			ret = -EIO;
>> +			goto out_unlock;
>> +		}
>> +		mds_wanted = __ceph_caps_mds_wanted(ci, false);
>> +		if (need & ~mds_wanted) {
>> +			dout("get_cap_refs %p need %s > mds_wanted %s\n",
>> +			     inode, ceph_cap_string(need),
>> +			     ceph_cap_string(mds_wanted));
>> +			ret = -ESTALE;
>> +			goto out_unlock;
>>   		}
>>   
> 
> I was able to reproduce softlockups with xfstests reliably for a little
> while, but it doesn't always happen. I bisected it down to this patch
> though. I suspect the problem is in the hunk above. It looks like it's
> getting into a situation where this is continually returning ESTALE.
> 
> I cranked up debug logging in this function and I see this:
> 
> [  259.284839] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.284848] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.284855] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.284863] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284868] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> [  259.284877] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284885] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284890] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284899] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.284908] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> [  259.284918] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284926] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284945] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> [  259.284950] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284961] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284969] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> [  259.284975] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.284984] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> [  259.284994] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> [  259.285003] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> 

Looks like ceph_check_caps did update caps' mds_wanted. Did you test 
this patch with async dirops patches? please try reproducing this issue 
again with debug log of try_get_cap_refs(), ceph_check_caps() and 
ceph_renew_caps().

Thanks
Yan, Zheng


> ...not sure I understand the logical flow in this function well enough
> to suggest a fix yet though.
> 
>> -		dout("get_cap_refs %p have %s needed %s\n", inode,
>> +		dout("get_cap_refs %p have %s need %s\n", inode,
>>   		     ceph_cap_string(have), ceph_cap_string(need));
>>   	}
>>   out_unlock:
>> @@ -3646,8 +3642,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
>>   		goto out_unlock;
>>   
>>   	if (target < 0) {
>> -		if (cap->mds_wanted | cap->issued)
>> -			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>>   		__ceph_remove_cap(cap, false);
>>   		goto out_unlock;
>>   	}
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index fab9d6461a65..98d746b3bb53 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -1411,8 +1411,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
>>   	dout("removing cap %p, ci is %p, inode is %p\n",
>>   	     cap, ci, &ci->vfs_inode);
>>   	spin_lock(&ci->i_ceph_lock);
>> -	if (cap->mds_wanted | cap->issued)
>> -		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>>   	__ceph_remove_cap(cap, false);
>>   	if (!ci->i_auth_cap) {
>>   		struct ceph_cap_flush *cf;
>> @@ -1578,9 +1576,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
>>   			/* mds did not re-issue stale cap */
>>   			spin_lock(&ci->i_ceph_lock);
>>   			cap->issued = cap->implemented = CEPH_CAP_PIN;
>> -			/* make sure mds knows what we want */
>> -			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
>> -				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
>>   			spin_unlock(&ci->i_ceph_lock);
>>   		}
>>   	} else if (ev == FORCE_RO) {
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 37dc1ac8f6c3..48e84d7f48a0 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -517,12 +517,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
>>   #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
>>   #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
>>   #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
>> -#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
>> -#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
>> -#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
>> -#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
>> -#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
>> -#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
>> +#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
>> +#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
>> +#define CEPH_I_ERROR_WRITE	(1 << 9)  /* have seen write errors */
>> +#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
>> +#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
>>   
>>   /*
>>    * Masks of ceph inode work.
>
Jeff Layton Feb. 24, 2020, 3:17 p.m. UTC | #3
On Mon, 2020-02-24 at 11:17 +0800, Yan, Zheng wrote:
> On 2/22/20 1:17 AM, Jeff Layton wrote:
> > On Fri, 2020-02-21 at 21:16 +0800, Yan, Zheng wrote:
> > > original code only renews caps for inodes with CEPH_I_CAP_DROPPED flags.
> > > The flag indicates that mds closed session and caps were dropped. This
> > > patch is preparation for not requesting caps for idle open files.
> > > 
> > > CEPH_I_CAP_DROPPED is no longer tested by anyone, so this patch also
> > > remove it.
> > > 
> > > Signed-off-by: "Yan, Zheng" <zyan@redhat.com>
> > > ---
> > >   fs/ceph/caps.c       | 36 +++++++++++++++---------------------
> > >   fs/ceph/mds_client.c |  5 -----
> > >   fs/ceph/super.h      | 11 +++++------
> > >   3 files changed, 20 insertions(+), 32 deletions(-)
> > > 
> > > diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> > > index d05717397c2a..293920d013ff 100644
> > > --- a/fs/ceph/caps.c
> > > +++ b/fs/ceph/caps.c
> > > @@ -2659,6 +2659,7 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> > >   		}
> > >   	} else {
> > >   		int session_readonly = false;
> > > +		int mds_wanted;
> > >   		if (ci->i_auth_cap &&
> > >   		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
> > >   			struct ceph_mds_session *s = ci->i_auth_cap->session;
> > > @@ -2667,32 +2668,27 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
> > >   			spin_unlock(&s->s_cap_lock);
> > >   		}
> > >   		if (session_readonly) {
> > > -			dout("get_cap_refs %p needed %s but mds%d readonly\n",
> > > +			dout("get_cap_refs %p need %s but mds%d readonly\n",
> > >   			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
> > >   			ret = -EROFS;
> > >   			goto out_unlock;
> > >   		}
> > >   
> > > -		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
> > > -			int mds_wanted;
> > > -			if (READ_ONCE(mdsc->fsc->mount_state) ==
> > > -			    CEPH_MOUNT_SHUTDOWN) {
> > > -				dout("get_cap_refs %p forced umount\n", inode);
> > > -				ret = -EIO;
> > > -				goto out_unlock;
> > > -			}
> > > -			mds_wanted = __ceph_caps_mds_wanted(ci, false);
> > > -			if (need & ~(mds_wanted & need)) {
> > > -				dout("get_cap_refs %p caps were dropped"
> > > -				     " (session killed?)\n", inode);
> > > -				ret = -ESTALE;
> > > -				goto out_unlock;
> > > -			}
> > > -			if (!(file_wanted & ~mds_wanted))
> > > -				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
> > > +		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
> > > +			dout("get_cap_refs %p forced umount\n", inode);
> > > +			ret = -EIO;
> > > +			goto out_unlock;
> > > +		}
> > > +		mds_wanted = __ceph_caps_mds_wanted(ci, false);
> > > +		if (need & ~mds_wanted) {
> > > +			dout("get_cap_refs %p need %s > mds_wanted %s\n",
> > > +			     inode, ceph_cap_string(need),
> > > +			     ceph_cap_string(mds_wanted));
> > > +			ret = -ESTALE;
> > > +			goto out_unlock;
> > >   		}
> > >   
> > 
> > I was able to reproduce softlockups with xfstests reliably for a little
> > while, but it doesn't always happen. I bisected it down to this patch
> > though. I suspect the problem is in the hunk above. It looks like it's
> > getting into a situation where this is continually returning ESTALE.
> > 
> > I cranked up debug logging in this function and I see this:
> > 
> > [  259.284839] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.284848] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.284855] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.284863] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284868] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> > [  259.284877] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284885] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284890] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284899] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.284908] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> > [  259.284918] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284926] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284945] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> > [  259.284950] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284961] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284969] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > [  259.284975] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.284984] ceph:  get_cap_refs 000000003d7d65fa need Fr > mds_wanted -
> > [  259.284994] ceph:  get_cap_refs 000000003d7d65fa ret -116 got -
> > [  259.285003] ceph:  get_cap_refs 000000003d7d65fa need Fr want Fc
> > 
> 
> Looks like ceph_check_caps did update caps' mds_wanted. Did you test 
> this patch with async dirops patches? please try reproducing this issue 
> again with debug log of try_get_cap_refs(), ceph_check_caps() and 
> ceph_renew_caps().
> 
> 

I tried with and without async dirops and was able to reproduce it
either way. It seems like the issue is that the mds_wanted set is not
being updated properly when we end up in in get_cap_refs.

I'm traveling at the moment though and won't be able to reproduce this
until I'm back at work (Thursday).


> 
> > ...not sure I understand the logical flow in this function well enough
> > to suggest a fix yet though.
> > 
> > > -		dout("get_cap_refs %p have %s needed %s\n", inode,
> > > +		dout("get_cap_refs %p have %s need %s\n", inode,
> > >   		     ceph_cap_string(have), ceph_cap_string(need));
> > >   	}
> > >   out_unlock:
> > > @@ -3646,8 +3642,6 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
> > >   		goto out_unlock;
> > >   
> > >   	if (target < 0) {
> > > -		if (cap->mds_wanted | cap->issued)
> > > -			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
> > >   		__ceph_remove_cap(cap, false);
> > >   		goto out_unlock;
> > >   	}
> > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > > index fab9d6461a65..98d746b3bb53 100644
> > > --- a/fs/ceph/mds_client.c
> > > +++ b/fs/ceph/mds_client.c
> > > @@ -1411,8 +1411,6 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
> > >   	dout("removing cap %p, ci is %p, inode is %p\n",
> > >   	     cap, ci, &ci->vfs_inode);
> > >   	spin_lock(&ci->i_ceph_lock);
> > > -	if (cap->mds_wanted | cap->issued)
> > > -		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
> > >   	__ceph_remove_cap(cap, false);
> > >   	if (!ci->i_auth_cap) {
> > >   		struct ceph_cap_flush *cf;
> > > @@ -1578,9 +1576,6 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
> > >   			/* mds did not re-issue stale cap */
> > >   			spin_lock(&ci->i_ceph_lock);
> > >   			cap->issued = cap->implemented = CEPH_CAP_PIN;
> > > -			/* make sure mds knows what we want */
> > > -			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
> > > -				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
> > >   			spin_unlock(&ci->i_ceph_lock);
> > >   		}
> > >   	} else if (ev == FORCE_RO) {
> > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > > index 37dc1ac8f6c3..48e84d7f48a0 100644
> > > --- a/fs/ceph/super.h
> > > +++ b/fs/ceph/super.h
> > > @@ -517,12 +517,11 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
> > >   #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
> > >   #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
> > >   #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
> > > -#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
> > > -#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
> > > -#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
> > > -#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
> > > -#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
> > > -#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
> > > +#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
> > > +#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
> > > +#define CEPH_I_ERROR_WRITE	(1 << 9)  /* have seen write errors */
> > > +#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
> > > +#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
> > >   
> > >   /*
> > >    * Masks of ceph inode work.

Patch
diff mbox series

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index d05717397c2a..293920d013ff 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2659,6 +2659,7 @@  static int try_get_cap_refs(struct inode *inode, int need, int want,
 		}
 	} else {
 		int session_readonly = false;
+		int mds_wanted;
 		if (ci->i_auth_cap &&
 		    (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
 			struct ceph_mds_session *s = ci->i_auth_cap->session;
@@ -2667,32 +2668,27 @@  static int try_get_cap_refs(struct inode *inode, int need, int want,
 			spin_unlock(&s->s_cap_lock);
 		}
 		if (session_readonly) {
-			dout("get_cap_refs %p needed %s but mds%d readonly\n",
+			dout("get_cap_refs %p need %s but mds%d readonly\n",
 			     inode, ceph_cap_string(need), ci->i_auth_cap->mds);
 			ret = -EROFS;
 			goto out_unlock;
 		}
 
-		if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
-			int mds_wanted;
-			if (READ_ONCE(mdsc->fsc->mount_state) ==
-			    CEPH_MOUNT_SHUTDOWN) {
-				dout("get_cap_refs %p forced umount\n", inode);
-				ret = -EIO;
-				goto out_unlock;
-			}
-			mds_wanted = __ceph_caps_mds_wanted(ci, false);
-			if (need & ~(mds_wanted & need)) {
-				dout("get_cap_refs %p caps were dropped"
-				     " (session killed?)\n", inode);
-				ret = -ESTALE;
-				goto out_unlock;
-			}
-			if (!(file_wanted & ~mds_wanted))
-				ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
+		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+			dout("get_cap_refs %p forced umount\n", inode);
+			ret = -EIO;
+			goto out_unlock;
+		}
+		mds_wanted = __ceph_caps_mds_wanted(ci, false);
+		if (need & ~mds_wanted) {
+			dout("get_cap_refs %p need %s > mds_wanted %s\n",
+			     inode, ceph_cap_string(need),
+			     ceph_cap_string(mds_wanted));
+			ret = -ESTALE;
+			goto out_unlock;
 		}
 
-		dout("get_cap_refs %p have %s needed %s\n", inode,
+		dout("get_cap_refs %p have %s need %s\n", inode,
 		     ceph_cap_string(have), ceph_cap_string(need));
 	}
 out_unlock:
@@ -3646,8 +3642,6 @@  static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
 		goto out_unlock;
 
 	if (target < 0) {
-		if (cap->mds_wanted | cap->issued)
-			ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 		__ceph_remove_cap(cap, false);
 		goto out_unlock;
 	}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fab9d6461a65..98d746b3bb53 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1411,8 +1411,6 @@  static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 	dout("removing cap %p, ci is %p, inode is %p\n",
 	     cap, ci, &ci->vfs_inode);
 	spin_lock(&ci->i_ceph_lock);
-	if (cap->mds_wanted | cap->issued)
-		ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 	__ceph_remove_cap(cap, false);
 	if (!ci->i_auth_cap) {
 		struct ceph_cap_flush *cf;
@@ -1578,9 +1576,6 @@  static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
 			/* mds did not re-issue stale cap */
 			spin_lock(&ci->i_ceph_lock);
 			cap->issued = cap->implemented = CEPH_CAP_PIN;
-			/* make sure mds knows what we want */
-			if (__ceph_caps_file_wanted(ci) & ~cap->mds_wanted)
-				ci->i_ceph_flags |= CEPH_I_CAP_DROPPED;
 			spin_unlock(&ci->i_ceph_lock);
 		}
 	} else if (ev == FORCE_RO) {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 37dc1ac8f6c3..48e84d7f48a0 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -517,12 +517,11 @@  static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_I_POOL_RD		(1 << 4)  /* can read from pool */
 #define CEPH_I_POOL_WR		(1 << 5)  /* can write to pool */
 #define CEPH_I_SEC_INITED	(1 << 6)  /* security initialized */
-#define CEPH_I_CAP_DROPPED	(1 << 7)  /* caps were forcibly dropped */
-#define CEPH_I_KICK_FLUSH	(1 << 8)  /* kick flushing caps */
-#define CEPH_I_FLUSH_SNAPS	(1 << 9)  /* need flush snapss */
-#define CEPH_I_ERROR_WRITE	(1 << 10) /* have seen write errors */
-#define CEPH_I_ERROR_FILELOCK	(1 << 11) /* have seen file lock errors */
-#define CEPH_I_ODIRECT		(1 << 12) /* inode in direct I/O mode */
+#define CEPH_I_KICK_FLUSH	(1 << 7)  /* kick flushing caps */
+#define CEPH_I_FLUSH_SNAPS	(1 << 8)  /* need flush snapss */
+#define CEPH_I_ERROR_WRITE	(1 << 9)  /* have seen write errors */
+#define CEPH_I_ERROR_FILELOCK	(1 << 10) /* have seen file lock errors */
+#define CEPH_I_ODIRECT		(1 << 11) /* inode in direct I/O mode */
 
 /*
  * Masks of ceph inode work.