diff mbox

locks: Set fl_nspid at file_lock allocation

Message ID 896e8ca302614f71f3030015ebf3befe2b40d3c4.1495122972.git.bcodding@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Benjamin Coddington May 18, 2017, 4:02 p.m. UTC
Since commit c69899a17ca4 "NFSv4: Update of VFS byte range lock must be
atomic with the stateid update", NFSv4 has been inserting locks in rpciod
worker context.  The result is that the file_lock's fl_nspid is the
kworker's pid instead of the original userspace pid.  We can fix that up by
setting fl_nspid in locks_allocate_lock, and tranfer it to the file_lock
that's eventually recorded.

Also, let's set fl_nspid directly in a few places it is stored on the
stack.  The use of fl_pid is retained for lock managers that do not set
fl_nspid and instead wish to use and record private fl_pid numbers.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
---
 fs/locks.c | 44 ++++++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 18 deletions(-)

Comments

Jeff Layton May 18, 2017, 4:55 p.m. UTC | #1
On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
> Since commit c69899a17ca4 "NFSv4: Update of VFS byte range lock must be
> atomic with the stateid update", NFSv4 has been inserting locks in rpciod
> worker context.  The result is that the file_lock's fl_nspid is the
> kworker's pid instead of the original userspace pid.  We can fix that up by
> setting fl_nspid in locks_allocate_lock, and tranfer it to the file_lock
> that's eventually recorded.
> 
> Also, let's set fl_nspid directly in a few places it is stored on the
> stack.  The use of fl_pid is retained for lock managers that do not set
> fl_nspid and instead wish to use and record private fl_pid numbers.
> 
> Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
> ---
>  fs/locks.c | 44 ++++++++++++++++++++++++++------------------
>  1 file changed, 26 insertions(+), 18 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index af2031a1fcff..959b3f93f4bd 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char *list_type)
>  	struct file_lock *fl;
>  
>  	list_for_each_entry(fl, list, fl_list) {
> -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
> +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, pid_vnr(fl->fl_nspid));
>  	}
>  }
>  

Probably should change the format to say fl_nspid=%u here, just to be
clear. Might also want to keep fl_pid in there since the lock manager
could set it.


> @@ -294,8 +294,10 @@ struct file_lock *locks_alloc_lock(void)
>  {
>  	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
>  
> -	if (fl)
> +	if (fl) {
>  		locks_init_lock_heads(fl);
> +		fl->fl_nspid = get_pid(task_tgid(current));
> +	}
>  
>  	return fl;
>  }
> @@ -328,6 +330,8 @@ void locks_free_lock(struct file_lock *fl)
>  	BUG_ON(!hlist_unhashed(&fl->fl_link));
>  
>  	locks_release_private(fl);
> +	if (fl->fl_nspid)
> +		put_pid(fl->fl_nspid);
>  	kmem_cache_free(filelock_cache, fl);
>  }
>  EXPORT_SYMBOL(locks_free_lock);
> @@ -357,8 +361,15 @@ EXPORT_SYMBOL(locks_init_lock);
>   */
>  void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
>  {
> +	struct pid *replace_pid = new->fl_nspid;
> +
>  	new->fl_owner = fl->fl_owner;
>  	new->fl_pid = fl->fl_pid;
> +	if (fl->fl_nspid) {
> +		new->fl_nspid = get_pid(fl->fl_nspid);
> +		if (replace_pid)
> +			put_pid(replace_pid);
> +	}
>  	new->fl_file = NULL;
>  	new->fl_flags = fl->fl_flags;
>  	new->fl_type = fl->fl_type;
> @@ -422,7 +433,6 @@ flock_make_lock(struct file *filp, unsigned int cmd)
>  
>  	fl->fl_file = filp;
>  	fl->fl_owner = filp;
> -	fl->fl_pid = current->tgid;
>  	fl->fl_flags = FL_FLOCK;
>  	fl->fl_type = type;
>  	fl->fl_end = OFFSET_MAX;
> @@ -482,7 +492,6 @@ static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
>  		fl->fl_end = OFFSET_MAX;
>  
>  	fl->fl_owner = current->files;
> -	fl->fl_pid = current->tgid;
>  	fl->fl_file = filp;
>  	fl->fl_flags = FL_POSIX;
>  	fl->fl_ops = NULL;
> @@ -547,8 +556,6 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
>  		return -EINVAL;
>  
>  	fl->fl_owner = filp;
> -	fl->fl_pid = current->tgid;
> -
>  	fl->fl_file = filp;
>  	fl->fl_flags = FL_LEASE;
>  	fl->fl_start = 0;
> @@ -733,7 +740,6 @@ static void locks_wake_up_blocks(struct file_lock *blocker)
>  static void
>  locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
>  {
> -	fl->fl_nspid = get_pid(task_tgid(current));
>  	list_add_tail(&fl->fl_list, before);
>  	locks_insert_global_locks(fl);
>  }
> @@ -743,10 +749,6 @@ locks_unlink_lock_ctx(struct file_lock *fl)
>  {
>  	locks_delete_global_locks(fl);
>  	list_del_init(&fl->fl_list);
> -	if (fl->fl_nspid) {
> -		put_pid(fl->fl_nspid);
> -		fl->fl_nspid = NULL;
> -	}
>  	locks_wake_up_blocks(fl);
>  }
>  
> @@ -823,8 +825,6 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
>  	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
>  		if (posix_locks_conflict(fl, cfl)) {
>  			locks_copy_conflock(fl, cfl);
> -			if (cfl->fl_nspid)
> -				fl->fl_pid = pid_vnr(cfl->fl_nspid);
>  			goto out;
>  		}
>  	}
> @@ -1298,7 +1298,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
>  	bool sleep = false;
>  
>  	locks_init_lock(&fl);
> -	fl.fl_pid = current->tgid;
> +	fl.fl_nspid = get_pid(task_tgid(current));
>  	fl.fl_file = filp;
>  	fl.fl_flags = FL_POSIX | FL_ACCESS;
>  	if (filp && !(filp->f_flags & O_NONBLOCK))
> @@ -1336,6 +1336,7 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
>  		break;
>  	}
>  
> +	put_pid(fl.fl_nspid);
>  	return error;
>  }
>  
> @@ -2052,7 +2053,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
>  
>  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
>  {
> -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
> +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
>  #if BITS_PER_LONG == 32
>  	/*
>  	 * Make sure we can represent the posix lock via
> @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
>  #if BITS_PER_LONG == 32
>  static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
>  {
> -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
> +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);

What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
this is always going to give you back the pid of lockd, AFAICT.

Do we want to present the pid value that the client sent here instead in
that case? Maybe the lm could set a fl_flag to indicate that the pid
should be taken directly from fl_pid here? Then you could move the above
logic to a static inline or something.

Alternately, you could add a new lm_present_pid operation to lock
managers to format the pid as they see fit.

>  	flock->l_start = fl->fl_start;
>  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
>  		fl->fl_end - fl->fl_start + 1;
> @@ -2093,6 +2094,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
>  	int error;
>  
>  	error = -EFAULT;
> +	file_lock.fl_nspid = get_pid(task_tgid(current));
>  	if (copy_from_user(&flock, l, sizeof(flock)))
>  		goto out;
>  	error = -EINVAL;
> @@ -2129,6 +2131,7 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
>  rel_priv:
>  	locks_release_private(&file_lock);
>  out:
> +	put_pid(file_lock.fl_nspid);
>  	return error;
>  }
>  
> @@ -2322,6 +2325,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
>  	int error;
>  
>  	error = -EFAULT;
> +	file_lock.fl_nspid = get_pid(task_tgid(current));

Might it be cleaner to create a FILE_LOCK(name) macro that does this on
the stack (like LIST_HEAD()) ?

>  	if (copy_from_user(&flock, l, sizeof(flock)))
>  		goto out;
>  	error = -EINVAL;
> @@ -2356,6 +2360,7 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
>  
>  	locks_release_private(&file_lock);
>  out:
> +	put_pid(file_lock.fl_nspid);
>  	return error;
>  }
>  
> @@ -2482,7 +2487,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
>  	lock.fl_start = 0;
>  	lock.fl_end = OFFSET_MAX;
>  	lock.fl_owner = owner;
> -	lock.fl_pid = current->tgid;
> +	lock.fl_nspid = get_pid(task_tgid(current));
>  	lock.fl_file = filp;
>  	lock.fl_ops = NULL;
>  	lock.fl_lmops = NULL;
> @@ -2491,6 +2496,7 @@ void locks_remove_posix(struct file *filp, fl_owner_t owner)
>  
>  	if (lock.fl_ops && lock.fl_ops->fl_release_private)
>  		lock.fl_ops->fl_release_private(&lock);
> +	put_pid(lock.fl_nspid);
>  	trace_locks_remove_posix(inode, &lock, error);
>  }
>  
> @@ -2502,7 +2508,6 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
>  {
>  	struct file_lock fl = {
>  		.fl_owner = filp,
> -		.fl_pid = current->tgid,
>  		.fl_file = filp,
>  		.fl_flags = FL_FLOCK | FL_CLOSE,
>  		.fl_type = F_UNLCK,
> @@ -2513,6 +2518,8 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
>  	if (list_empty(&flctx->flc_flock))
>  		return;
>  
> +	fl.fl_nspid = get_pid(task_tgid(current));
> +
>  	if (filp->f_op->flock && is_remote_lock(filp))
>  		filp->f_op->flock(filp, F_SETLKW, &fl);
>  	else
> @@ -2520,6 +2527,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
>  
>  	if (fl.fl_ops && fl.fl_ops->fl_release_private)
>  		fl.fl_ops->fl_release_private(&fl);
> +	put_pid(fl.fl_nspid);

I think we only need to take a reference for when the file_lock can
outlive the current task, so the get/put may not be necessary in these
functions.

>  }
>  
>  /* The i_flctx must be valid when calling into here */

This does look like a step in the right direction, I think.
Benjamin Coddington May 18, 2017, 5:36 p.m. UTC | #2
> On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
>> diff --git a/fs/locks.c b/fs/locks.c
>> index af2031a1fcff..959b3f93f4bd 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char 
>> *list_type)
>>  	struct file_lock *fl;
>>
>>  	list_for_each_entry(fl, list, fl_list) {
>> -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", 
>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
>> +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", 
>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, 
>> pid_vnr(fl->fl_nspid));
>>  	}
>>  }
>>
>
> Probably should change the format to say fl_nspid=%u here, just to be
> clear. Might also want to keep fl_pid in there since the lock manager
> could set it.

Yes, I thought about just spitting out both.  Let's do that.

>> @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock 
>> *flock, struct file_lock *fl)
>>  #if BITS_PER_LONG == 32
>>  static void posix_lock_to_flock64(struct flock64 *flock, struct 
>> file_lock *fl)
>>  {
>> -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
>> +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
>
> What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
> this is always going to give you back the pid of lockd, AFAICT.

But isn't this really what you want?  If a local process wants to know 
who
holds a conflicting lock, the fl_pid of a remote system is really pretty
useless.  Not only that, but there's no way for the local process to 
know
when the pid is local or remote.  Better to be consistent and always 
return
something that's useful.

> Do we want to present the pid value that the client sent here instead 
> in
> that case? Maybe the lm could set a fl_flag to indicate that the pid
> should be taken directly from fl_pid here? Then you could move the 
> above
> logic to a static inline or something.
>
> Alternately, you could add a new lm_present_pid operation to lock
> managers to format the pid as they see fit.

Either works to solve the problem, but I still think that F_GETLK and
/proc/locks should only return local pids.

>> @@ -2322,6 +2325,7 @@ int fcntl_getlk64(struct file *filp, unsigned 
>> int cmd, struct flock64 __user *l)
>>  	int error;
>>
>>  	error = -EFAULT;
>> +	file_lock.fl_nspid = get_pid(task_tgid(current));
>
> Might it be cleaner to create a FILE_LOCK(name) macro that does this 
> on
> the stack (like LIST_HEAD()) ?

Yes, it would.  I'll do it.

>> @@ -2520,6 +2527,7 @@ locks_remove_flock(struct file *filp, struct 
>> file_lock_context *flctx)
>>
>>  	if (fl.fl_ops && fl.fl_ops->fl_release_private)
>>  		fl.fl_ops->fl_release_private(&fl);
>> +	put_pid(fl.fl_nspid);
>
> I think we only need to take a reference for when the file_lock can
> outlive the current task, so the get/put may not be necessary in these
> functions.

Right.. of course.  I can clean those up.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton May 18, 2017, 8:41 p.m. UTC | #3
On Thu, 2017-05-18 at 13:36 -0400, Benjamin Coddington wrote:
> > On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
> > > diff --git a/fs/locks.c b/fs/locks.c
> > > index af2031a1fcff..959b3f93f4bd 100644
> > > --- a/fs/locks.c
> > > +++ b/fs/locks.c
> > > @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char 
> > > *list_type)
> > >  	struct file_lock *fl;
> > > 
> > >  	list_for_each_entry(fl, list, fl_list) {
> > > -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", 
> > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
> > > +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", 
> > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, 
> > > pid_vnr(fl->fl_nspid));
> > >  	}
> > >  }
> > > 
> > 
> > Probably should change the format to say fl_nspid=%u here, just to be
> > clear. Might also want to keep fl_pid in there since the lock manager
> > could set it.
> 
> Yes, I thought about just spitting out both.  Let's do that.
> 
> > > @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock 
> > > *flock, struct file_lock *fl)
> > >  #if BITS_PER_LONG == 32
> > >  static void posix_lock_to_flock64(struct flock64 *flock, struct 
> > > file_lock *fl)
> > >  {
> > > -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
> > > +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
> > 
> > What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
> > this is always going to give you back the pid of lockd, AFAICT.
> 
> But isn't this really what you want?  If a local process wants to know 
> who
> holds a conflicting lock, the fl_pid of a remote system is really pretty
> useless.  Not only that, but there's no way for the local process to 
> know
> when the pid is local or remote.  Better to be consistent and always 
> return
> something that's useful.
> 

The l_pid field in struct flock (and by extension fl_pid) is pretty
poorly defined in the spec(s), especially when there is a remote host
involved. Programs that rely on it are insane, of course...but Linux has
always behaved this way.

In the absence of a compelling reason to change it, I think we should
keep the behavior in this respect as close as possible to what we have
now.

> > Do we want to present the pid value that the client sent here instead 
> > in
> > that case? Maybe the lm could set a fl_flag to indicate that the pid
> > should be taken directly from fl_pid here? Then you could move the 
> > above
> > logic to a static inline or something.
> > 
> > Alternately, you could add a new lm_present_pid operation to lock
> > managers to format the pid as they see fit.
> 
> Either works to solve the problem, but I still think that F_GETLK and
> /proc/locks should only return local pids.
> 
> > > @@ -2322,6 +2325,7 @@ int fcntl_getlk64(struct file *filp, unsigned 
> > > int cmd, struct flock64 __user *l)
> > >  	int error;
> > > 
> > >  	error = -EFAULT;
> > > +	file_lock.fl_nspid = get_pid(task_tgid(current));
> > 
> > Might it be cleaner to create a FILE_LOCK(name) macro that does this 
> > on
> > the stack (like LIST_HEAD()) ?
> 
> Yes, it would.  I'll do it.
> 

In some of those places it might not hurt to consider allocating and
freeing a file_lock instead. file_lock is not exactly small (208 bytes
on my latest build)...

> > > @@ -2520,6 +2527,7 @@ locks_remove_flock(struct file *filp, struct 
> > > file_lock_context *flctx)
> > > 
> > >  	if (fl.fl_ops && fl.fl_ops->fl_release_private)
> > >  		fl.fl_ops->fl_release_private(&fl);
> > > +	put_pid(fl.fl_nspid);
> > 
> > I think we only need to take a reference for when the file_lock can
> > outlive the current task, so the get/put may not be necessary in these
> > functions.
> 
> Right.. of course.  I can clean those up.
>
Benjamin Coddington May 19, 2017, 12:35 p.m. UTC | #4
On 18 May 2017, at 16:41, Jeff Layton wrote:

> On Thu, 2017-05-18 at 13:36 -0400, Benjamin Coddington wrote:
>>> On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
>>>> diff --git a/fs/locks.c b/fs/locks.c
>>>> index af2031a1fcff..959b3f93f4bd 100644
>>>> --- a/fs/locks.c
>>>> +++ b/fs/locks.c
>>>> @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char
>>>> *list_type)
>>>>  	struct file_lock *fl;
>>>>
>>>>  	list_for_each_entry(fl, list, fl_list) {
>>>> -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
>>>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
>>>> +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
>>>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type,
>>>> pid_vnr(fl->fl_nspid));
>>>>  	}
>>>>  }
>>>>
>>>
>>> Probably should change the format to say fl_nspid=%u here, just to be
>>> clear. Might also want to keep fl_pid in there since the lock manager
>>> could set it.
>>
>> Yes, I thought about just spitting out both.  Let's do that.
>>
>>>> @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock
>>>> *flock, struct file_lock *fl)
>>>>  #if BITS_PER_LONG == 32
>>>>  static void posix_lock_to_flock64(struct flock64 *flock, struct
>>>> file_lock *fl)
>>>>  {
>>>> -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
>>>> +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
>>>
>>> What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
>>> this is always going to give you back the pid of lockd, AFAICT.
>>
>> But isn't this really what you want?  If a local process wants to know
>> who
>> holds a conflicting lock, the fl_pid of a remote system is really pretty
>> useless.  Not only that, but there's no way for the local process to
>> know
>> when the pid is local or remote.  Better to be consistent and always
>> return
>> something that's useful.
>>
>
> The l_pid field in struct flock (and by extension fl_pid) is pretty
> poorly defined in the spec(s), especially when there is a remote host
> involved. Programs that rely on it are insane, of course...but Linux has
> always behaved this way.

But if it is completely useless today, then we can change it without
worrying about breaking it because it is already broken.  There's no
documentation anywhere that informs users of F_GETLK or /proc/locks that
l_pid is completely unreliable.

Do you know why linux hasn't picked up l_sysid?  I can't seem to find any
previous discussion about it.

> In the absence of a compelling reason to change it, I think we should
> keep the behavior in this respect as close as possible to what we have
> now.

I think the reason would be l_pid should at least have some consistent
meaning.  I think now that this patch probably shouldn't change it, but it
should be changed.

>>> Do we want to present the pid value that the client sent here instead
>>> in
>>> that case? Maybe the lm could set a fl_flag to indicate that the pid
>>> should be taken directly from fl_pid here? Then you could move the
>>> above
>>> logic to a static inline or something.
>>>
>>> Alternately, you could add a new lm_present_pid operation to lock
>>> managers to format the pid as they see fit.
>>
>> Either works to solve the problem, but I still think that F_GETLK and
>> /proc/locks should only return local pids.
>>
>>>> @@ -2322,6 +2325,7 @@ int fcntl_getlk64(struct file *filp, unsigned
>>>> int cmd, struct flock64 __user *l)
>>>>  	int error;
>>>>
>>>>  	error = -EFAULT;
>>>> +	file_lock.fl_nspid = get_pid(task_tgid(current));
>>>
>>> Might it be cleaner to create a FILE_LOCK(name) macro that does this
>>> on
>>> the stack (like LIST_HEAD()) ?
>>
>> Yes, it would.  I'll do it.
>>
>
> In some of those places it might not hurt to consider allocating and
> freeing a file_lock instead. file_lock is not exactly small (208 bytes
> on my latest build)...

Do you want that wrapped up with this patch?

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton May 19, 2017, 1:28 p.m. UTC | #5
On Fri, 2017-05-19 at 08:35 -0400, Benjamin Coddington wrote:
> On 18 May 2017, at 16:41, Jeff Layton wrote:
> 
> > On Thu, 2017-05-18 at 13:36 -0400, Benjamin Coddington wrote:
> > > > On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
> > > > > diff --git a/fs/locks.c b/fs/locks.c
> > > > > index af2031a1fcff..959b3f93f4bd 100644
> > > > > --- a/fs/locks.c
> > > > > +++ b/fs/locks.c
> > > > > @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char
> > > > > *list_type)
> > > > >  	struct file_lock *fl;
> > > > > 
> > > > >  	list_for_each_entry(fl, list, fl_list) {
> > > > > -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
> > > > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
> > > > > +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
> > > > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type,
> > > > > pid_vnr(fl->fl_nspid));
> > > > >  	}
> > > > >  }
> > > > > 
> > > > 
> > > > Probably should change the format to say fl_nspid=%u here, just to be
> > > > clear. Might also want to keep fl_pid in there since the lock manager
> > > > could set it.
> > > 
> > > Yes, I thought about just spitting out both.  Let's do that.
> > > 
> > > > > @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock
> > > > > *flock, struct file_lock *fl)
> > > > >  #if BITS_PER_LONG == 32
> > > > >  static void posix_lock_to_flock64(struct flock64 *flock, struct
> > > > > file_lock *fl)
> > > > >  {
> > > > > -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
> > > > > +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
> > > > 
> > > > What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
> > > > this is always going to give you back the pid of lockd, AFAICT.
> > > 
> > > But isn't this really what you want?  If a local process wants to know
> > > who
> > > holds a conflicting lock, the fl_pid of a remote system is really pretty
> > > useless.  Not only that, but there's no way for the local process to
> > > know
> > > when the pid is local or remote.  Better to be consistent and always
> > > return
> > > something that's useful.
> > > 
> > 
> > The l_pid field in struct flock (and by extension fl_pid) is pretty
> > poorly defined in the spec(s), especially when there is a remote host
> > involved. Programs that rely on it are insane, of course...but Linux has
> > always behaved this way.
> 
> But if it is completely useless today, then we can change it without
> worrying about breaking it because it is already broken.  There's no
> documentation anywhere that informs users of F_GETLK or /proc/locks that
> l_pid is completely unreliable.
> 

True...but again, that's how it currently works today and I haven't yet
heard a good reason for changing it. The argument so far is "because we
can and no one should care", but I don't think that's enough. AFAICT,
the current behavior is not really causing any problems, per-se.

If we want to change it, we need to have an explanation for the poor sap
who chimes in in two years that his application subtly stopped working
the same way when his kernel was upgraded. If we can say we did it to
solve a specific problem, then I'd be more inclined to change it.

> Do you know why linux hasn't picked up l_sysid?  I can't seem to find any
> previous discussion about it.
> 

Because it's not part of POSIX? The fcntl spec page says:

"The identification of a machine in a network environment is outside the
scope of this volume of POSIX.1-2008. Thus, an l_sysid member, such as
found in System V, is not included in the locking structure."

We could change that, but it's a userland ABI change and you'd need new
F_* command constants (at the very least)...and if we're going to go
there then I'd want to start thinking about an async locking API.

(cue the snowball effect)

> > In the absence of a compelling reason to change it, I think we should
> > keep the behavior in this respect as close as possible to what we have
> > now.
> 
> I think the reason would be l_pid should at least have some consistent
> meaning.  I think now that this patch probably shouldn't change it, but it
> should be changed.
> 
> > > > Do we want to present the pid value that the client sent here instead
> > > > in
> > > > that case? Maybe the lm could set a fl_flag to indicate that the pid
> > > > should be taken directly from fl_pid here? Then you could move the
> > > > above
> > > > logic to a static inline or something.
> > > > 
> > > > Alternately, you could add a new lm_present_pid operation to lock
> > > > managers to format the pid as they see fit.
> > > 
> > > Either works to solve the problem, but I still think that F_GETLK and
> > > /proc/locks should only return local pids.
> > > 
> > > > > @@ -2322,6 +2325,7 @@ int fcntl_getlk64(struct file *filp, unsigned
> > > > > int cmd, struct flock64 __user *l)
> > > > >  	int error;
> > > > > 
> > > > >  	error = -EFAULT;
> > > > > +	file_lock.fl_nspid = get_pid(task_tgid(current));
> > > > 
> > > > Might it be cleaner to create a FILE_LOCK(name) macro that does this
> > > > on
> > > > the stack (like LIST_HEAD()) ?
> > > 
> > > Yes, it would.  I'll do it.
> > > 
> > 
> > In some of those places it might not hurt to consider allocating and
> > freeing a file_lock instead. file_lock is not exactly small (208 bytes
> > on my latest build)...
> 
> Do you want that wrapped up with this patch?
> 
> Ben
Benjamin Coddington May 26, 2017, 3:22 p.m. UTC | #6
On 19 May 2017, at 8:35, Benjamin Coddington wrote:

> On 18 May 2017, at 16:41, Jeff Layton wrote:
>
>> On Thu, 2017-05-18 at 13:36 -0400, Benjamin Coddington wrote:
>>>> On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
>>>>> diff --git a/fs/locks.c b/fs/locks.c
>>>>> index af2031a1fcff..959b3f93f4bd 100644
>>>>> --- a/fs/locks.c
>>>>> +++ b/fs/locks.c
>>>>> @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char
>>>>> *list_type)
>>>>>  	struct file_lock *fl;
>>>>>
>>>>>  	list_for_each_entry(fl, list, fl_list) {
>>>>> -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
>>>>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
>>>>> +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
>>>>> list_type, fl->fl_owner, fl->fl_flags, fl->fl_type,
>>>>> pid_vnr(fl->fl_nspid));
>>>>>  	}
>>>>>  }
>>>>>
>>>>
>>>> Probably should change the format to say fl_nspid=%u here, just to be
>>>> clear. Might also want to keep fl_pid in there since the lock manager
>>>> could set it.
>>>
>>> Yes, I thought about just spitting out both.  Let's do that.
>>>
>>>>> @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock
>>>>> *flock, struct file_lock *fl)
>>>>>  #if BITS_PER_LONG == 32
>>>>>  static void posix_lock_to_flock64(struct flock64 *flock, struct
>>>>> file_lock *fl)
>>>>>  {
>>>>> -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
>>>>> +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
>>>>
>>>> What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
>>>> this is always going to give you back the pid of lockd, AFAICT.
>>>
>>> But isn't this really what you want?  If a local process wants to know
>>> who
>>> holds a conflicting lock, the fl_pid of a remote system is really pretty
>>> useless.  Not only that, but there's no way for the local process to
>>> know
>>> when the pid is local or remote.  Better to be consistent and always
>>> return
>>> something that's useful.
>>>
>>
>> The l_pid field in struct flock (and by extension fl_pid) is pretty
>> poorly defined in the spec(s), especially when there is a remote host
>> involved. Programs that rely on it are insane, of course...but Linux has
>> always behaved this way.
>
> But if it is completely useless today, then we can change it without
> worrying about breaking it because it is already broken.  There's no
> documentation anywhere that informs users of F_GETLK or /proc/locks that
> l_pid is completely unreliable.
>
> Do you know why linux hasn't picked up l_sysid?  I can't seem to find any
> previous discussion about it.
>
>> In the absence of a compelling reason to change it, I think we should
>> keep the behavior in this respect as close as possible to what we have
>> now.
>
> I think the reason would be l_pid should at least have some consistent
> meaning.  I think now that this patch probably shouldn't change it, but it
> should be changed.
>
>>>> Do we want to present the pid value that the client sent here instead
>>>> in
>>>> that case? Maybe the lm could set a fl_flag to indicate that the pid
>>>> should be taken directly from fl_pid here? Then you could move the
>>>> above
>>>> logic to a static inline or something.
>>>>
>>>> Alternately, you could add a new lm_present_pid operation to lock
>>>> managers to format the pid as they see fit.
>>>
>>> Either works to solve the problem, but I still think that F_GETLK and
>>> /proc/locks should only return local pids.

It turns out that the lm_present_pid approach is not sufficient and we
should instead use a flag, since the non-lock-manager fs/lockd/clntproc.c
wants to use fl->fl_pid = 0 in the case where the client is testing and
finds a conflicting lock.

So, the client considers remote pids to be bogus, which makes a lot of sense
to me.

Additionally, after testing, today's kernel returns lockd's pid on a local
F_GETLCK for a remotely-held NFS lock.  So, I think our understanding of the
situation needs to be reversed.  Lock manager's locks are locally reporting
the local lock pid, but sometimes a remote lock needs to override the local
pid to set fl_pid.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton May 26, 2017, 4:49 p.m. UTC | #7
On Fri, 2017-05-26 at 11:22 -0400, Benjamin Coddington wrote:
> On 19 May 2017, at 8:35, Benjamin Coddington wrote:
> 
> > On 18 May 2017, at 16:41, Jeff Layton wrote:
> > 
> > > On Thu, 2017-05-18 at 13:36 -0400, Benjamin Coddington wrote:
> > > > > On Thu, 2017-05-18 at 12:02 -0400, Benjamin Coddington wrote:
> > > > > > diff --git a/fs/locks.c b/fs/locks.c
> > > > > > index af2031a1fcff..959b3f93f4bd 100644
> > > > > > --- a/fs/locks.c
> > > > > > +++ b/fs/locks.c
> > > > > > @@ -249,7 +249,7 @@ locks_dump_ctx_list(struct list_head *list, char
> > > > > > *list_type)
> > > > > >  	struct file_lock *fl;
> > > > > > 
> > > > > >  	list_for_each_entry(fl, list, fl_list) {
> > > > > > -		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
> > > > > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
> > > > > > +		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
> > > > > > list_type, fl->fl_owner, fl->fl_flags, fl->fl_type,
> > > > > > pid_vnr(fl->fl_nspid));
> > > > > >  	}
> > > > > >  }
> > > > > > 
> > > > > 
> > > > > Probably should change the format to say fl_nspid=%u here, just to be
> > > > > clear. Might also want to keep fl_pid in there since the lock manager
> > > > > could set it.
> > > > 
> > > > Yes, I thought about just spitting out both.  Let's do that.
> > > > 
> > > > > > @@ -2074,7 +2075,7 @@ static int posix_lock_to_flock(struct flock
> > > > > > *flock, struct file_lock *fl)
> > > > > >  #if BITS_PER_LONG == 32
> > > > > >  static void posix_lock_to_flock64(struct flock64 *flock, struct
> > > > > > file_lock *fl)
> > > > > >  {
> > > > > > -	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
> > > > > > +	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
> > > > > 
> > > > > What about the lock managers that _do_ set fl->fl_pid. With nfsv2/3,
> > > > > this is always going to give you back the pid of lockd, AFAICT.
> > > > 
> > > > But isn't this really what you want?  If a local process wants to know
> > > > who
> > > > holds a conflicting lock, the fl_pid of a remote system is really pretty
> > > > useless.  Not only that, but there's no way for the local process to
> > > > know
> > > > when the pid is local or remote.  Better to be consistent and always
> > > > return
> > > > something that's useful.
> > > > 
> > > 
> > > The l_pid field in struct flock (and by extension fl_pid) is pretty
> > > poorly defined in the spec(s), especially when there is a remote host
> > > involved. Programs that rely on it are insane, of course...but Linux has
> > > always behaved this way.
> > 
> > But if it is completely useless today, then we can change it without
> > worrying about breaking it because it is already broken.  There's no
> > documentation anywhere that informs users of F_GETLK or /proc/locks that
> > l_pid is completely unreliable.
> > 
> > Do you know why linux hasn't picked up l_sysid?  I can't seem to find any
> > previous discussion about it.
> > 
> > > In the absence of a compelling reason to change it, I think we should
> > > keep the behavior in this respect as close as possible to what we have
> > > now.
> > 
> > I think the reason would be l_pid should at least have some consistent
> > meaning.  I think now that this patch probably shouldn't change it, but it
> > should be changed.
> > 
> > > > > Do we want to present the pid value that the client sent here instead
> > > > > in
> > > > > that case? Maybe the lm could set a fl_flag to indicate that the pid
> > > > > should be taken directly from fl_pid here? Then you could move the
> > > > > above
> > > > > logic to a static inline or something.
> > > > > 
> > > > > Alternately, you could add a new lm_present_pid operation to lock
> > > > > managers to format the pid as they see fit.
> > > > 
> > > > Either works to solve the problem, but I still think that F_GETLK and
> > > > /proc/locks should only return local pids.
> 
> It turns out that the lm_present_pid approach is not sufficient and we
> should instead use a flag, since the non-lock-manager fs/lockd/clntproc.c
> wants to use fl->fl_pid = 0 in the case where the client is testing and
> finds a conflicting lock.
> 
> So, the client considers remote pids to be bogus, which makes a lot of sense
> to me.
> 

Yeah, not much it can do with a pid, really...

> Additionally, after testing, today's kernel returns lockd's pid on a local
> F_GETLCK for a remotely-held NFS lock.  So, I think our understanding of the
> situation needs to be reversed.  Lock manager's locks are locally reporting
> the local lock pid, but sometimes a remote lock needs to override the local
> pid to set fl_pid.
> 

Fair enough. Now that I look...v4 locks set by knfsd just pick up the
pid of whatever the nfsd thread it happens to be running in. From
nfsd4_lock:

    file_lock->fl_pid = current->tgid;

So, it sounds like it really is totally meaningless then. In that case
I'll reverse my earlier opinion, and say that if it's easier to just
set it to whatever lockd's pid is, then that'd be fine with me.

OTOH, pid_t is an int, and I don't think negative pids are valid (are
they?)

Maybe we should set it to -1 for a remote lock (like we do for OFD
locks). Or, could consider declaring a new value (-2?) to represent a
remote lock?
Benjamin Coddington May 26, 2017, 5:53 p.m. UTC | #8
On 26 May 2017, at 12:49, Jeff Layton wrote:

> On Fri, 2017-05-26 at 11:22 -0400, Benjamin Coddington wrote:
>> On 19 May 2017, at 8:35, Benjamin Coddington wrote:
>> So, the client considers remote pids to be bogus, which makes a lot 
>> of sense
>> to me.
>>
>
> Yeah, not much it can do with a pid, really...
>
>> Additionally, after testing, today's kernel returns lockd's pid on a 
>> local
>> F_GETLCK for a remotely-held NFS lock.  So, I think our understanding 
>> of the
>> situation needs to be reversed.  Lock manager's locks are locally 
>> reporting
>> the local lock pid, but sometimes a remote lock needs to override the 
>> local
>> pid to set fl_pid.
>>
>
> Fair enough. Now that I look...v4 locks set by knfsd just pick up the
> pid of whatever the nfsd thread it happens to be running in. From
> nfsd4_lock:
>
>     file_lock->fl_pid = current->tgid;
>
> So, it sounds like it really is totally meaningless then. In that case
> I'll reverse my earlier opinion, and say that if it's easier to just
> set it to whatever lockd's pid is, then that'd be fine with me.
>
> OTOH, pid_t is an int, and I don't think negative pids are valid (are
> they?)
>
> Maybe we should set it to -1 for a remote lock (like we do for OFD
> locks). Or, could consider declaring a new value (-2?) to represent a
> remote lock?

OK, for my own clarity I'd like to nail down the desired behavior for 
all
four cases:

1) F_GETLK on a remote file with a remote lock.

     I think the filesystem should determine the l_pid to return here.  
NFS
is returning 0 for v3.  Other filesystems are doing different things.  
This
is easy to do from locks.c by setting a flag on the lock in the 
ops->lock
path for F_GETLK.

2) F_GETLK on a local file with a remote lock.

     I think this should be the l_pid of the lock manager.  That seems 
to be
the case now for NFS.

3) F_GETLK on a remote file with a local lock, and..
4) F_GETLK on a local file with a local lock.

     Should set l_pid of the local locking process

This is still an unreliable mess, but I don't see any way around it 
until we
have something like l_sysid.

Ben
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton May 26, 2017, 7:39 p.m. UTC | #9
On Fri, 2017-05-26 at 13:53 -0400, Benjamin Coddington wrote:
> On 26 May 2017, at 12:49, Jeff Layton wrote:
> 
> > On Fri, 2017-05-26 at 11:22 -0400, Benjamin Coddington wrote:
> > > On 19 May 2017, at 8:35, Benjamin Coddington wrote:
> > > So, the client considers remote pids to be bogus, which makes a lot 
> > > of sense
> > > to me.
> > > 
> > 
> > Yeah, not much it can do with a pid, really...
> > 
> > > Additionally, after testing, today's kernel returns lockd's pid on a 
> > > local
> > > F_GETLCK for a remotely-held NFS lock.  So, I think our understanding 
> > > of the
> > > situation needs to be reversed.  Lock manager's locks are locally 
> > > reporting
> > > the local lock pid, but sometimes a remote lock needs to override the 
> > > local
> > > pid to set fl_pid.
> > > 
> > 
> > Fair enough. Now that I look...v4 locks set by knfsd just pick up the
> > pid of whatever the nfsd thread it happens to be running in. From
> > nfsd4_lock:
> > 
> >     file_lock->fl_pid = current->tgid;
> > 
> > So, it sounds like it really is totally meaningless then. In that case
> > I'll reverse my earlier opinion, and say that if it's easier to just
> > set it to whatever lockd's pid is, then that'd be fine with me.
> > 
> > OTOH, pid_t is an int, and I don't think negative pids are valid (are
> > they?)
> > 
> > Maybe we should set it to -1 for a remote lock (like we do for OFD
> > locks). Or, could consider declaring a new value (-2?) to represent a
> > remote lock?
> 
> OK, for my own clarity I'd like to nail down the desired behavior for 
> all
> four cases:
> 
> 1) F_GETLK on a remote file with a remote lock.
> 
>      I think the filesystem should determine the l_pid to return here.  
> NFS
> is returning 0 for v3.  Other filesystems are doing different things.  
> This
> is easy to do from locks.c by setting a flag on the lock in the 
> ops->lock
> path for F_GETLK.
> 
> 2) F_GETLK on a local file with a remote lock.
> 
>      I think this should be the l_pid of the lock manager.  That seems 
> to be
> the case now for NFS.
> 
> 3) F_GETLK on a remote file with a local lock, and..
> 4) F_GETLK on a local file with a local lock.
> 
>      Should set l_pid of the local locking process
> 
> This is still an unreliable mess, but I don't see any way around it 
> until we
> have something like l_sysid.
> 
> 

ACK...I'm onboard now. That all sounds pretty reasonable.

But...most important would be to add some comments that lay out this
rationale at the right point in the code sothat  we don't forget this
conversation in a year or two. I'd suggest a comment over the file_lock
for sure, to explain why we have fl_pid and fl_nspid and how we intend
for them to be used.

Would also be interesting to add some tests for this to xfstests too, as
I know that gets run frequently...

I'm also not opposed to the idea of l_sysid. It'd mean adding in a new
API of some sort, but it could be done. However, l_sysid on solaris is a
single int. I think that's probably insufficient in this day and age.

Maybe you would want to allow the pass in a pointer to a buffer, and
have the kernel populate it with a string? We could have a per-fs string
formatter (with a standard one for local filesystems).
diff mbox

Patch

diff --git a/fs/locks.c b/fs/locks.c
index af2031a1fcff..959b3f93f4bd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -249,7 +249,7 @@  locks_dump_ctx_list(struct list_head *list, char *list_type)
 	struct file_lock *fl;
 
 	list_for_each_entry(fl, list, fl_list) {
-		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
+		pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, pid_vnr(fl->fl_nspid));
 	}
 }
 
@@ -294,8 +294,10 @@  struct file_lock *locks_alloc_lock(void)
 {
 	struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
 
-	if (fl)
+	if (fl) {
 		locks_init_lock_heads(fl);
+		fl->fl_nspid = get_pid(task_tgid(current));
+	}
 
 	return fl;
 }
@@ -328,6 +330,8 @@  void locks_free_lock(struct file_lock *fl)
 	BUG_ON(!hlist_unhashed(&fl->fl_link));
 
 	locks_release_private(fl);
+	if (fl->fl_nspid)
+		put_pid(fl->fl_nspid);
 	kmem_cache_free(filelock_cache, fl);
 }
 EXPORT_SYMBOL(locks_free_lock);
@@ -357,8 +361,15 @@  EXPORT_SYMBOL(locks_init_lock);
  */
 void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 {
+	struct pid *replace_pid = new->fl_nspid;
+
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
+	if (fl->fl_nspid) {
+		new->fl_nspid = get_pid(fl->fl_nspid);
+		if (replace_pid)
+			put_pid(replace_pid);
+	}
 	new->fl_file = NULL;
 	new->fl_flags = fl->fl_flags;
 	new->fl_type = fl->fl_type;
@@ -422,7 +433,6 @@  flock_make_lock(struct file *filp, unsigned int cmd)
 
 	fl->fl_file = filp;
 	fl->fl_owner = filp;
-	fl->fl_pid = current->tgid;
 	fl->fl_flags = FL_FLOCK;
 	fl->fl_type = type;
 	fl->fl_end = OFFSET_MAX;
@@ -482,7 +492,6 @@  static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 		fl->fl_end = OFFSET_MAX;
 
 	fl->fl_owner = current->files;
-	fl->fl_pid = current->tgid;
 	fl->fl_file = filp;
 	fl->fl_flags = FL_POSIX;
 	fl->fl_ops = NULL;
@@ -547,8 +556,6 @@  static int lease_init(struct file *filp, long type, struct file_lock *fl)
 		return -EINVAL;
 
 	fl->fl_owner = filp;
-	fl->fl_pid = current->tgid;
-
 	fl->fl_file = filp;
 	fl->fl_flags = FL_LEASE;
 	fl->fl_start = 0;
@@ -733,7 +740,6 @@  static void locks_wake_up_blocks(struct file_lock *blocker)
 static void
 locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
 {
-	fl->fl_nspid = get_pid(task_tgid(current));
 	list_add_tail(&fl->fl_list, before);
 	locks_insert_global_locks(fl);
 }
@@ -743,10 +749,6 @@  locks_unlink_lock_ctx(struct file_lock *fl)
 {
 	locks_delete_global_locks(fl);
 	list_del_init(&fl->fl_list);
-	if (fl->fl_nspid) {
-		put_pid(fl->fl_nspid);
-		fl->fl_nspid = NULL;
-	}
 	locks_wake_up_blocks(fl);
 }
 
@@ -823,8 +825,6 @@  posix_test_lock(struct file *filp, struct file_lock *fl)
 	list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
 		if (posix_locks_conflict(fl, cfl)) {
 			locks_copy_conflock(fl, cfl);
-			if (cfl->fl_nspid)
-				fl->fl_pid = pid_vnr(cfl->fl_nspid);
 			goto out;
 		}
 	}
@@ -1298,7 +1298,7 @@  int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
 	bool sleep = false;
 
 	locks_init_lock(&fl);
-	fl.fl_pid = current->tgid;
+	fl.fl_nspid = get_pid(task_tgid(current));
 	fl.fl_file = filp;
 	fl.fl_flags = FL_POSIX | FL_ACCESS;
 	if (filp && !(filp->f_flags & O_NONBLOCK))
@@ -1336,6 +1336,7 @@  int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
 		break;
 	}
 
+	put_pid(fl.fl_nspid);
 	return error;
 }
 
@@ -2052,7 +2053,7 @@  EXPORT_SYMBOL_GPL(vfs_test_lock);
 
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
+	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
 #if BITS_PER_LONG == 32
 	/*
 	 * Make sure we can represent the posix lock via
@@ -2074,7 +2075,7 @@  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
+	flock->l_pid = IS_OFDLCK(fl) ? -1 : pid_vnr(fl->fl_nspid);
 	flock->l_start = fl->fl_start;
 	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
 		fl->fl_end - fl->fl_start + 1;
@@ -2093,6 +2094,7 @@  int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 	int error;
 
 	error = -EFAULT;
+	file_lock.fl_nspid = get_pid(task_tgid(current));
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 	error = -EINVAL;
@@ -2129,6 +2131,7 @@  int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
 rel_priv:
 	locks_release_private(&file_lock);
 out:
+	put_pid(file_lock.fl_nspid);
 	return error;
 }
 
@@ -2322,6 +2325,7 @@  int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 	int error;
 
 	error = -EFAULT;
+	file_lock.fl_nspid = get_pid(task_tgid(current));
 	if (copy_from_user(&flock, l, sizeof(flock)))
 		goto out;
 	error = -EINVAL;
@@ -2356,6 +2360,7 @@  int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
 
 	locks_release_private(&file_lock);
 out:
+	put_pid(file_lock.fl_nspid);
 	return error;
 }
 
@@ -2482,7 +2487,7 @@  void locks_remove_posix(struct file *filp, fl_owner_t owner)
 	lock.fl_start = 0;
 	lock.fl_end = OFFSET_MAX;
 	lock.fl_owner = owner;
-	lock.fl_pid = current->tgid;
+	lock.fl_nspid = get_pid(task_tgid(current));
 	lock.fl_file = filp;
 	lock.fl_ops = NULL;
 	lock.fl_lmops = NULL;
@@ -2491,6 +2496,7 @@  void locks_remove_posix(struct file *filp, fl_owner_t owner)
 
 	if (lock.fl_ops && lock.fl_ops->fl_release_private)
 		lock.fl_ops->fl_release_private(&lock);
+	put_pid(lock.fl_nspid);
 	trace_locks_remove_posix(inode, &lock, error);
 }
 
@@ -2502,7 +2508,6 @@  locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 {
 	struct file_lock fl = {
 		.fl_owner = filp,
-		.fl_pid = current->tgid,
 		.fl_file = filp,
 		.fl_flags = FL_FLOCK | FL_CLOSE,
 		.fl_type = F_UNLCK,
@@ -2513,6 +2518,8 @@  locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 	if (list_empty(&flctx->flc_flock))
 		return;
 
+	fl.fl_nspid = get_pid(task_tgid(current));
+
 	if (filp->f_op->flock && is_remote_lock(filp))
 		filp->f_op->flock(filp, F_SETLKW, &fl);
 	else
@@ -2520,6 +2527,7 @@  locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
 
 	if (fl.fl_ops && fl.fl_ops->fl_release_private)
 		fl.fl_ops->fl_release_private(&fl);
+	put_pid(fl.fl_nspid);
 }
 
 /* The i_flctx must be valid when calling into here */