diff mbox

[3/6,v5] fs_pin: Kill fs_pin under a reference of vfsmnt

Message ID 558126FA.8050608@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kinglong Mee June 17, 2015, 7:51 a.m. UTC
v5, new patch

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
---
 fs/namespace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

Comments

J. Bruce Fields June 19, 2015, 8:29 p.m. UTC | #1
On Wed, Jun 17, 2015 at 03:51:22PM +0800, Kinglong Mee wrote:
> v5, new patch

I don't know this code at all.  I'll try to give it a proper review.
But could you help me by explaining in some detail what this is doing
and why you're sure it's correct?

--b.

> 
> Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
> ---
>  fs/namespace.c | 10 ++++++++--
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 1b9e111..3f08a48 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -1049,8 +1049,6 @@ static void cleanup_mnt(struct mount *mnt)
>  	 * so mnt_get_writers() below is safe.
>  	 */
>  	WARN_ON(mnt_get_writers(mnt));
> -	if (unlikely(mnt->mnt_pins.first))
> -		mnt_pin_kill(mnt);
>  	fsnotify_vfsmount_delete(&mnt->mnt);
>  	dput(mnt->mnt.mnt_root);
>  	deactivate_super(mnt->mnt.mnt_sb);
> @@ -1078,6 +1076,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
>  
>  static void mntput_no_expire(struct mount *mnt)
>  {
> +put_again:
>  	rcu_read_lock();
>  	mnt_add_count(mnt, -1);
>  	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
> @@ -1090,6 +1089,13 @@ static void mntput_no_expire(struct mount *mnt)
>  		unlock_mount_hash();
>  		return;
>  	}
> +	if (unlikely(mnt->mnt_pins.first)) {
> +		mnt_add_count(mnt, 1);
> +		rcu_read_unlock();
> +		unlock_mount_hash();
> +		mnt_pin_kill(mnt);
> +		goto put_again;
> +	}
>  	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
>  		rcu_read_unlock();
>  		unlock_mount_hash();
> -- 
> 2.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
Al Viro June 19, 2015, 8:44 p.m. UTC | #2
On Wed, Jun 17, 2015 at 03:51:22PM +0800, Kinglong Mee wrote:
> +++ b/fs/namespace.c
> @@ -1049,8 +1049,6 @@ static void cleanup_mnt(struct mount *mnt)
>  	 * so mnt_get_writers() below is safe.
>  	 */
>  	WARN_ON(mnt_get_writers(mnt));
> -	if (unlikely(mnt->mnt_pins.first))
> -		mnt_pin_kill(mnt);
>  	fsnotify_vfsmount_delete(&mnt->mnt);
>  	dput(mnt->mnt.mnt_root);
>  	deactivate_super(mnt->mnt.mnt_sb);
> @@ -1078,6 +1076,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
>  
>  static void mntput_no_expire(struct mount *mnt)
>  {
> +put_again:
>  	rcu_read_lock();
>  	mnt_add_count(mnt, -1);
>  	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
> @@ -1090,6 +1089,13 @@ static void mntput_no_expire(struct mount *mnt)
>  		unlock_mount_hash();
>  		return;
>  	}
> +	if (unlikely(mnt->mnt_pins.first)) {
> +		mnt_add_count(mnt, 1);
> +		rcu_read_unlock();
> +		unlock_mount_hash();
> +		mnt_pin_kill(mnt);
> +		goto put_again;
> +	}
>  	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
>  		rcu_read_unlock();
>  		unlock_mount_hash();

This is absolutely wrong.  For one thing, you are running those suckers on
fairly deep stack now, which is a bloody bad idea for a lot reasons -
final mntput() can come with a lot of stack space consumed.  For another,
I'm really not convinced that what you are doing won't bugger the ordering
to hell and back - not without a detailed analysis I don't see in these
patches.

If you want to be able to grab references by those suckers, this is a very
wrong way to go.  Look at legitimize_mnt() for better approach, and yes,
you need to be able to cope with "too late, it's already doomed".  Or you'll
trade those -EBUSY for race with umount(2) returning before the fs shutdown
is complete.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
Kinglong Mee June 25, 2015, 2:14 p.m. UTC | #3
On 6/20/2015 4:29 AM, J. Bruce Fields wrote:
> On Wed, Jun 17, 2015 at 03:51:22PM +0800, Kinglong Mee wrote:
>> v5, new patch
> 
> I don't know this code at all.  I'll try to give it a proper review.
> But could you help me by explaining in some detail what this is doing
> and why you're sure it's correct?

Sorry for my misunderstand of your means in version 4.
I have make a new version as, 

When reference of cahce_head increase(>1), grab a reference of mnt once.
and  reference decrease to 1 (==1), drop the reference of mnt.

So after that,
When ref > 1, user cannot umount the filesystem with -EBUSY.
when ref ==1, means cache only reference by nfsd cache,
no other reference. So user can try umount, 
1. before set MNT_UMOUNT (protected by mount_lock), nfsd cache is
   referenced (ref > 1, legitimize_mntget), umount will fail with -EBUSY.
2. after set MNT_UMOUNT, nfsd cache is referenced (ref == 2),
   legitimize_mntget will fail, and set cache to CACHE_NEGATIVE,
   and the reference will be dropped, re-back to 1.
   So, pin_kill can delete the cache and umount success.
3. when umountting, no reference to nfsd cache, 
   pin_kill can delete the cache and umount success.

thanks,
Kinglong Mee

> 
> --b.
> 
>>
>> Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
>> ---
>>  fs/namespace.c | 10 ++++++++--
>>  1 file changed, 8 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/namespace.c b/fs/namespace.c
>> index 1b9e111..3f08a48 100644
>> --- a/fs/namespace.c
>> +++ b/fs/namespace.c
>> @@ -1049,8 +1049,6 @@ static void cleanup_mnt(struct mount *mnt)
>>  	 * so mnt_get_writers() below is safe.
>>  	 */
>>  	WARN_ON(mnt_get_writers(mnt));
>> -	if (unlikely(mnt->mnt_pins.first))
>> -		mnt_pin_kill(mnt);
>>  	fsnotify_vfsmount_delete(&mnt->mnt);
>>  	dput(mnt->mnt.mnt_root);
>>  	deactivate_super(mnt->mnt.mnt_sb);
>> @@ -1078,6 +1076,7 @@ static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
>>  
>>  static void mntput_no_expire(struct mount *mnt)
>>  {
>> +put_again:
>>  	rcu_read_lock();
>>  	mnt_add_count(mnt, -1);
>>  	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
>> @@ -1090,6 +1089,13 @@ static void mntput_no_expire(struct mount *mnt)
>>  		unlock_mount_hash();
>>  		return;
>>  	}
>> +	if (unlikely(mnt->mnt_pins.first)) {
>> +		mnt_add_count(mnt, 1);
>> +		rcu_read_unlock();
>> +		unlock_mount_hash();
>> +		mnt_pin_kill(mnt);
>> +		goto put_again;
>> +	}
>>  	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
>>  		rcu_read_unlock();
>>  		unlock_mount_hash();
>> -- 
>> 2.4.3
> .
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/namespace.c b/fs/namespace.c
index 1b9e111..3f08a48 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1049,8 +1049,6 @@  static void cleanup_mnt(struct mount *mnt)
 	 * so mnt_get_writers() below is safe.
 	 */
 	WARN_ON(mnt_get_writers(mnt));
-	if (unlikely(mnt->mnt_pins.first))
-		mnt_pin_kill(mnt);
 	fsnotify_vfsmount_delete(&mnt->mnt);
 	dput(mnt->mnt.mnt_root);
 	deactivate_super(mnt->mnt.mnt_sb);
@@ -1078,6 +1076,7 @@  static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 
 static void mntput_no_expire(struct mount *mnt)
 {
+put_again:
 	rcu_read_lock();
 	mnt_add_count(mnt, -1);
 	if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */
@@ -1090,6 +1089,13 @@  static void mntput_no_expire(struct mount *mnt)
 		unlock_mount_hash();
 		return;
 	}
+	if (unlikely(mnt->mnt_pins.first)) {
+		mnt_add_count(mnt, 1);
+		rcu_read_unlock();
+		unlock_mount_hash();
+		mnt_pin_kill(mnt);
+		goto put_again;
+	}
 	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
 		rcu_read_unlock();
 		unlock_mount_hash();