diff mbox

[v2,3/8] vfs: Add O_DENYREAD/WRITE flags support for open syscall

Message ID 1358441584-8783-3-git-send-email-piastry@etersoft.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Shilovsky Jan. 17, 2013, 4:52 p.m. UTC
If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
translated to flock's flags:

!O_DENYREAD  -> LOCK_READ
!O_DENYWRITE -> LOCK_WRITE
O_DENYMAND   -> LOCK_MAND

and set through flock_lock_file on a file.

This change only affects opens that use O_DENYMAND flag - all other
native Linux opens don't care about these flags. It allow us to
enable this feature for applications that need it (e.g. NFS and
Samba servers that export the same directory for Windows clients,
or Wine applications that access the same files simultaneously).

Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
---
 fs/locks.c         | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 fs/namei.c         |  10 ++++-
 include/linux/fs.h |   6 +++
 3 files changed, 118 insertions(+), 4 deletions(-)

Comments

J. Bruce Fields Jan. 30, 2013, 10:16 p.m. UTC | #1
On Thu, Jan 17, 2013 at 08:52:59PM +0400, Pavel Shilovsky wrote:
> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
> translated to flock's flags:
> 
> !O_DENYREAD  -> LOCK_READ
> !O_DENYWRITE -> LOCK_WRITE
> O_DENYMAND   -> LOCK_MAND
> 
> and set through flock_lock_file on a file.
> 
> This change only affects opens that use O_DENYMAND flag - all other
> native Linux opens don't care about these flags. It allow us to
> enable this feature for applications that need it (e.g. NFS and
> Samba servers that export the same directory for Windows clients,
> or Wine applications that access the same files simultaneously).

The use of an is_conflict callback seems unnecessarily convoluted.

If we need two different behaviors, let's just use another flag (or an
extra boolean argument if we need to, or something).

The only caller for this new deny_lock_file is in the nfs code--I'm a
little unclear why that is.

--b.

> 
> Signed-off-by: Pavel Shilovsky <piastry@etersoft.ru>
> ---
>  fs/locks.c         | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/namei.c         |  10 ++++-
>  include/linux/fs.h |   6 +++
>  3 files changed, 118 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/locks.c b/fs/locks.c
> index 9edfec4..ebe9a30 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -605,12 +605,86 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
>  	return (locks_conflict(caller_fl, sys_fl));
>  }
>  
> -/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
> +static unsigned int
> +deny_flags_to_cmd(unsigned int flags)
> +{
> +	unsigned int cmd = LOCK_MAND;
> +
> +	if (!(flags & O_DENYREAD))
> +		cmd |= LOCK_READ;
> +	if (!(flags & O_DENYWRITE))
> +		cmd |= LOCK_WRITE;
> +
> +	return cmd;
> +}
> +
> +/*
> + * locks_mand_conflict - Determine if there's a share reservation conflict
> + * @caller_fl: lock we're attempting to acquire
> + * @sys_fl: lock already present on system that we're checking against
> + *
> + * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE
> + * tell us whether the reservation allows other readers and writers.
> + *
> + * We only check against other LOCK_MAND locks, so applications that want to
> + * use share mode locking will only conflict against one another. "normal"
> + * applications that open files won't be affected by and won't themselves
> + * affect the share reservations.
> + */
> +static int
> +locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
> +{
> +	unsigned char caller_type = caller_fl->fl_type;
> +	unsigned char sys_type = sys_fl->fl_type;
> +	fmode_t caller_fmode = caller_fl->fl_file->f_mode;
> +	fmode_t sys_fmode = sys_fl->fl_file->f_mode;
> +
> +	/* they can only conflict if they're both LOCK_MAND */
> +	if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND))
> +		return 0;
> +
> +	if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ))
> +		return 1;
> +	if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE))
> +		return 1;
> +	if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ))
> +		return 1;
> +	if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Determine if lock sys_fl blocks lock caller_fl. O_DENY* flags specific
> + * checking before calling the locks_conflict().
> + */
> +static int
> +deny_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
> +{
> +	/*
> +	 * FLOCK locks referring to the same filp do not conflict with
> +	 * each other.
> +	 */
> +	if (!IS_FLOCK(sys_fl))
> +		return 0;
> +	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
> +		return locks_mand_conflict(caller_fl, sys_fl);
> +	if (caller_fl->fl_file == sys_fl->fl_file)
> +		return 0;
> +
> +	return locks_conflict(caller_fl, sys_fl);
> +}
> +
> +/*
> + * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
>   * checking before calling the locks_conflict().
>   */
> -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
> +static int
> +flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
>  {
> -	/* FLOCK locks referring to the same filp do not conflict with
> +	/*
> +	 * FLOCK locks referring to the same filp do not conflict with
>  	 * each other.
>  	 */
>  	if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
> @@ -789,6 +863,32 @@ out:
>  	return error;
>  }
>  
> +/*
> + * Determine if a file is allowed to be opened with specified access and deny
> + * modes. Lock the file and return 0 if checks passed, otherwise return a error
> + * code.
> + */
> +int
> +deny_lock_file(struct file *filp)
> +{
> +	struct file_lock *lock;
> +	int error = 0;
> +
> +	if (!(filp->f_flags & O_DENYMAND))
> +		return error;
> +
> +	error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags));
> +	if (error)
> +		return error;
> +
> +	error = flock_lock_file(filp, lock, deny_locks_conflict);
> +	if (error == -EAGAIN)
> +		error = -ETXTBSY;
> +
> +	locks_free_lock(lock);
> +	return error;
> +}
> +
>  static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
>  {
>  	struct file_lock *fl;
> diff --git a/fs/namei.c b/fs/namei.c
> index 5f4cdf3..bf3bb34 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2569,9 +2569,14 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
>  	 * here.
>  	 */
>  	error = may_open(&file->f_path, acc_mode, open_flag);
> -	if (error)
> +	if (error) {
>  		fput(file);
> +		goto out;
> +	}
>  
> +	error = deny_lock_file(file);
> +	if (error)
> +		fput(file);
>  out:
>  	dput(dentry);
>  	return error;
> @@ -2908,6 +2913,9 @@ opened:
>  		if (error)
>  			goto exit_fput;
>  	}
> +	error = deny_lock_file(file);
> +	if (error)
> +		goto exit_fput;
>  out:
>  	if (got_write)
>  		mnt_drop_write(nd->path.mnt);
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 70a766ae..b8ed06e 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1004,6 +1004,7 @@ extern int lease_modify(struct file_lock **, int);
>  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
>  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
>  extern void locks_delete_block(struct file_lock *waiter);
> +extern int deny_lock_file(struct file *);
>  extern void lock_flocks(void);
>  extern void unlock_flocks(void);
>  #else /* !CONFIG_FILE_LOCKING */
> @@ -1152,6 +1153,11 @@ static inline void locks_delete_block(struct file_lock *waiter)
>  {
>  }
>  
> +static inline int deny_lock_file(struct file *filp)
> +{
> +	return 0;
> +}
> +
>  static inline void lock_flocks(void)
>  {
>  }
> -- 
> 1.8.1.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Feb. 5, 2013, 11:45 a.m. UTC | #2
2013/1/31 J. Bruce Fields <bfields@fieldses.org>:
> On Thu, Jan 17, 2013 at 08:52:59PM +0400, Pavel Shilovsky wrote:
>> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
>> translated to flock's flags:
>>
>> !O_DENYREAD  -> LOCK_READ
>> !O_DENYWRITE -> LOCK_WRITE
>> O_DENYMAND   -> LOCK_MAND
>>
>> and set through flock_lock_file on a file.
>>
>> This change only affects opens that use O_DENYMAND flag - all other
>> native Linux opens don't care about these flags. It allow us to
>> enable this feature for applications that need it (e.g. NFS and
>> Samba servers that export the same directory for Windows clients,
>> or Wine applications that access the same files simultaneously).
>
> The use of an is_conflict callback seems unnecessarily convoluted.
>
> If we need two different behaviors, let's just use another flag (or an
> extra boolean argument if we need to, or something).

Ok, we can pass "bool is_mand" to flock_lock_file that will pass it
further to flock_locks_conflict.

>
> The only caller for this new deny_lock_file is in the nfs code--I'm a
> little unclear why that is.

deny_lock_file is called not only in the nfs code but also in 2 places
of fs/namei.c -- that enable this logic for VFS.
J. Bruce Fields Feb. 5, 2013, 2:35 p.m. UTC | #3
On Tue, Feb 05, 2013 at 03:45:31PM +0400, Pavel Shilovsky wrote:
> 2013/1/31 J. Bruce Fields <bfields@fieldses.org>:
> > On Thu, Jan 17, 2013 at 08:52:59PM +0400, Pavel Shilovsky wrote:
> >> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
> >> translated to flock's flags:
> >>
> >> !O_DENYREAD  -> LOCK_READ
> >> !O_DENYWRITE -> LOCK_WRITE
> >> O_DENYMAND   -> LOCK_MAND
> >>
> >> and set through flock_lock_file on a file.
> >>
> >> This change only affects opens that use O_DENYMAND flag - all other
> >> native Linux opens don't care about these flags. It allow us to
> >> enable this feature for applications that need it (e.g. NFS and
> >> Samba servers that export the same directory for Windows clients,
> >> or Wine applications that access the same files simultaneously).
> >
> > The use of an is_conflict callback seems unnecessarily convoluted.
> >
> > If we need two different behaviors, let's just use another flag (or an
> > extra boolean argument if we need to, or something).
> 
> Ok, we can pass "bool is_mand" to flock_lock_file that will pass it
> further to flock_locks_conflict.
> 
> >
> > The only caller for this new deny_lock_file is in the nfs code--I'm a
> > little unclear why that is.
> 
> deny_lock_file is called not only in the nfs code but also in 2 places
> of fs/namei.c -- that enable this logic for VFS.

Oops, apologies, I overlooked those somehow.

What prevents somebody else from grabbing a lock on a newly-created file
before we grab our own lock?

I couldn't tell on a quick look whether we hold some lock that prevents
that.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Feb. 7, 2013, 9:53 a.m. UTC | #4
2013/2/5 J. Bruce Fields <bfields@fieldses.org>:
> On Tue, Feb 05, 2013 at 03:45:31PM +0400, Pavel Shilovsky wrote:
>> 2013/1/31 J. Bruce Fields <bfields@fieldses.org>:
>> > On Thu, Jan 17, 2013 at 08:52:59PM +0400, Pavel Shilovsky wrote:
>> >> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
>> >> translated to flock's flags:
>> >>
>> >> !O_DENYREAD  -> LOCK_READ
>> >> !O_DENYWRITE -> LOCK_WRITE
>> >> O_DENYMAND   -> LOCK_MAND
>> >>
>> >> and set through flock_lock_file on a file.
>> >>
>> >> This change only affects opens that use O_DENYMAND flag - all other
>> >> native Linux opens don't care about these flags. It allow us to
>> >> enable this feature for applications that need it (e.g. NFS and
>> >> Samba servers that export the same directory for Windows clients,
>> >> or Wine applications that access the same files simultaneously).
>> >
>> > The use of an is_conflict callback seems unnecessarily convoluted.
>> >
>> > If we need two different behaviors, let's just use another flag (or an
>> > extra boolean argument if we need to, or something).
>>
>> Ok, we can pass "bool is_mand" to flock_lock_file that will pass it
>> further to flock_locks_conflict.
>>
>> >
>> > The only caller for this new deny_lock_file is in the nfs code--I'm a
>> > little unclear why that is.
>>
>> deny_lock_file is called not only in the nfs code but also in 2 places
>> of fs/namei.c -- that enable this logic for VFS.
>
> Oops, apologies, I overlooked those somehow.
>
> What prevents somebody else from grabbing a lock on a newly-created file
> before we grab our own lock?
>
> I couldn't tell on a quick look whether we hold some lock that prevents
> that.

Nothing prevents it. If somebody grabbed a share mode lock on a file
before we call deny_lock_file, we simply close this file and return
-ETXTBSY. We can't grab it before atomic_open because we don't have an
inode there. Anyway, we can't make it atomic for VFS without big code
changes, but for CIFS and NFS it is already atomic with the discussed
patch.
J. Bruce Fields Feb. 7, 2013, 2:18 p.m. UTC | #5
On Thu, Feb 07, 2013 at 01:53:46PM +0400, Pavel Shilovsky wrote:
> 2013/2/5 J. Bruce Fields <bfields@fieldses.org>:
> > On Tue, Feb 05, 2013 at 03:45:31PM +0400, Pavel Shilovsky wrote:
> >> 2013/1/31 J. Bruce Fields <bfields@fieldses.org>:
> >> > On Thu, Jan 17, 2013 at 08:52:59PM +0400, Pavel Shilovsky wrote:
> >> >> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
> >> >> translated to flock's flags:
> >> >>
> >> >> !O_DENYREAD  -> LOCK_READ
> >> >> !O_DENYWRITE -> LOCK_WRITE
> >> >> O_DENYMAND   -> LOCK_MAND
> >> >>
> >> >> and set through flock_lock_file on a file.
> >> >>
> >> >> This change only affects opens that use O_DENYMAND flag - all other
> >> >> native Linux opens don't care about these flags. It allow us to
> >> >> enable this feature for applications that need it (e.g. NFS and
> >> >> Samba servers that export the same directory for Windows clients,
> >> >> or Wine applications that access the same files simultaneously).
> >> >
> >> > The use of an is_conflict callback seems unnecessarily convoluted.
> >> >
> >> > If we need two different behaviors, let's just use another flag (or an
> >> > extra boolean argument if we need to, or something).
> >>
> >> Ok, we can pass "bool is_mand" to flock_lock_file that will pass it
> >> further to flock_locks_conflict.
> >>
> >> >
> >> > The only caller for this new deny_lock_file is in the nfs code--I'm a
> >> > little unclear why that is.
> >>
> >> deny_lock_file is called not only in the nfs code but also in 2 places
> >> of fs/namei.c -- that enable this logic for VFS.
> >
> > Oops, apologies, I overlooked those somehow.
> >
> > What prevents somebody else from grabbing a lock on a newly-created file
> > before we grab our own lock?
> >
> > I couldn't tell on a quick look whether we hold some lock that prevents
> > that.
> 
> Nothing prevents it. If somebody grabbed a share mode lock on a file
> before we call deny_lock_file, we simply close this file and return
> -ETXTBSY.

But leave the newly-created file there--ugh.

> We can't grab it before atomic_open because we don't have an
> inode there.

If you can get the lock while still holding the directory i_mutex can't
you prevent anyone else from looking up the new file until you've gotten
the lock?

--b.

> Anyway, we can't make it atomic for VFS without big code
> changes, but for CIFS and NFS it is already atomic with the discussed
> patch.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Feb. 7, 2013, 2:32 p.m. UTC | #6
2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> On Thu, Feb 07, 2013 at 01:53:46PM +0400, Pavel Shilovsky wrote:
>> Nothing prevents it. If somebody grabbed a share mode lock on a file
>> before we call deny_lock_file, we simply close this file and return
>> -ETXTBSY.
>
> But leave the newly-created file there--ugh.
>
>> We can't grab it before atomic_open because we don't have an
>> inode there.
>
> If you can get the lock while still holding the directory i_mutex can't
> you prevent anyone else from looking up the new file until you've gotten
> the lock?
>

Hm..., seems you are right, I missed this part:
mutex_lock
lookup_open -> atomic_open -> deny_lock_file
mutex_unlock

that means that nobody can open and of course set flock on the newly
created file (because flock is done through file descriptor). So, it
should be fine to call flock after f_ops->atomic_open in atomic_open
function. Thanks.
J. Bruce Fields Feb. 7, 2013, 2:41 p.m. UTC | #7
On Thu, Feb 07, 2013 at 06:32:38PM +0400, Pavel Shilovsky wrote:
> 2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> > On Thu, Feb 07, 2013 at 01:53:46PM +0400, Pavel Shilovsky wrote:
> >> Nothing prevents it. If somebody grabbed a share mode lock on a file
> >> before we call deny_lock_file, we simply close this file and return
> >> -ETXTBSY.
> >
> > But leave the newly-created file there--ugh.
> >
> >> We can't grab it before atomic_open because we don't have an
> >> inode there.
> >
> > If you can get the lock while still holding the directory i_mutex can't
> > you prevent anyone else from looking up the new file until you've gotten
> > the lock?
> >
> 
> Hm..., seems you are right, I missed this part:
> mutex_lock
> lookup_open -> atomic_open -> deny_lock_file
> mutex_unlock
> 
> that means that nobody can open and of course set flock on the newly
> created file (because flock is done through file descriptor). So, it
> should be fine to call flock after f_ops->atomic_open in atomic_open
> function. Thanks.

Whether that works may also depend on how the new dentry is set up?  If
it's hashed before you call flock then I suppose it's already visible to
others.

Not knowing that code as well as I should, I might test by introducing
an artificial delay there and trying to reproduce the race.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Feb. 7, 2013, 4 p.m. UTC | #8
2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> On Thu, Feb 07, 2013 at 06:32:38PM +0400, Pavel Shilovsky wrote:
>> 2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
>> > On Thu, Feb 07, 2013 at 01:53:46PM +0400, Pavel Shilovsky wrote:
>> >> Nothing prevents it. If somebody grabbed a share mode lock on a file
>> >> before we call deny_lock_file, we simply close this file and return
>> >> -ETXTBSY.
>> >
>> > But leave the newly-created file there--ugh.
>> >
>> >> We can't grab it before atomic_open because we don't have an
>> >> inode there.
>> >
>> > If you can get the lock while still holding the directory i_mutex can't
>> > you prevent anyone else from looking up the new file until you've gotten
>> > the lock?
>> >
>>
>> Hm..., seems you are right, I missed this part:
>> mutex_lock
>> lookup_open -> atomic_open -> deny_lock_file
>> mutex_unlock
>>
>> that means that nobody can open and of course set flock on the newly
>> created file (because flock is done through file descriptor). So, it
>> should be fine to call flock after f_ops->atomic_open in atomic_open
>> function. Thanks.
>
> Whether that works may also depend on how the new dentry is set up?  If
> it's hashed before you call flock then I suppose it's already visible to
> others.

It seems it should be hashed in f_ops->atomic_open() (at least cifs
and nfs do it this way). In do_last when we do an ordinary open, we
don't hit parent i_mutex if lookup is succeeded through lookup_fast.
lookup_fast can catch newly created dentry and set it's share mode
before atomic_open codepath hits deny_lock_file.

Also, I noted that: atomic open does f_ops->atomic_open and then it
processes may_open check; if may_open fails, the file is closed and
open returns with a error code (but file is created anyway) . I think
there is no difference between this case and the situation with
deny_lock_file there.
J. Bruce Fields Feb. 7, 2013, 4:19 p.m. UTC | #9
On Thu, Feb 07, 2013 at 08:00:13PM +0400, Pavel Shilovsky wrote:
> 2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> > On Thu, Feb 07, 2013 at 06:32:38PM +0400, Pavel Shilovsky wrote:
> >> 2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> >> > On Thu, Feb 07, 2013 at 01:53:46PM +0400, Pavel Shilovsky wrote:
> >> >> Nothing prevents it. If somebody grabbed a share mode lock on a file
> >> >> before we call deny_lock_file, we simply close this file and return
> >> >> -ETXTBSY.
> >> >
> >> > But leave the newly-created file there--ugh.
> >> >
> >> >> We can't grab it before atomic_open because we don't have an
> >> >> inode there.
> >> >
> >> > If you can get the lock while still holding the directory i_mutex can't
> >> > you prevent anyone else from looking up the new file until you've gotten
> >> > the lock?
> >> >
> >>
> >> Hm..., seems you are right, I missed this part:
> >> mutex_lock
> >> lookup_open -> atomic_open -> deny_lock_file
> >> mutex_unlock
> >>
> >> that means that nobody can open and of course set flock on the newly
> >> created file (because flock is done through file descriptor). So, it
> >> should be fine to call flock after f_ops->atomic_open in atomic_open
> >> function. Thanks.
> >
> > Whether that works may also depend on how the new dentry is set up?  If
> > it's hashed before you call flock then I suppose it's already visible to
> > others.
> 
> It seems it should be hashed in f_ops->atomic_open() (at least cifs
> and nfs do it this way). In do_last when we do an ordinary open, we
> don't hit parent i_mutex if lookup is succeeded through lookup_fast.
> lookup_fast can catch newly created dentry and set it's share mode
> before atomic_open codepath hits deny_lock_file.
> 
> Also, I noted that: atomic open does f_ops->atomic_open and then it
> processes may_open check; if may_open fails, the file is closed and
> open returns with a error code (but file is created anyway).

That would be a bug, I think.  E.g. "man 3posix open":

	No  files  shall  be created or modified if the function returns
	-1.

Looking at the code...  See the references to FILE_CREATED in
atomic_open--looks like that's trying to prevent may_open from failing
in this case.

> I think
> there is no difference between this case and the situation with
> deny_lock_file there.

Looks to me like it would be a bug in either case.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Pavel Shilovsky Feb. 7, 2013, 4:50 p.m. UTC | #10
2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> That would be a bug, I think.  E.g. "man 3posix open":
>
>         No  files  shall  be created or modified if the function returns
>         -1.
>
> Looking at the code...  See the references to FILE_CREATED in
> atomic_open--looks like that's trying to prevent may_open from failing
> in this case.
>
>> I think
>> there is no difference between this case and the situation with
>> deny_lock_file there.
>
> Looks to me like it would be a bug in either case.

Then we returned from lookup_open in do_last we go to 'opened' lable.
Then we have a 3(!) chances to return -1 while a file is created
(open_check_o_direct, ima_file_check, handle_truncate). In this case
these places are bugs too.

We can call vfs_unlink if we failed after a file was created, but
possible affects need to be investigated.
J. Bruce Fields Feb. 7, 2013, 5:03 p.m. UTC | #11
On Thu, Feb 07, 2013 at 08:50:16PM +0400, Pavel Shilovsky wrote:
> 2013/2/7 J. Bruce Fields <bfields@fieldses.org>:
> > That would be a bug, I think.  E.g. "man 3posix open":
> >
> >         No  files  shall  be created or modified if the function returns
> >         -1.
> >
> > Looking at the code...  See the references to FILE_CREATED in
> > atomic_open--looks like that's trying to prevent may_open from failing
> > in this case.
> >
> >> I think
> >> there is no difference between this case and the situation with
> >> deny_lock_file there.
> >
> > Looks to me like it would be a bug in either case.
> 
> Then we returned from lookup_open in do_last we go to 'opened' lable.
> Then we have a 3(!) chances to return -1 while a file is created
> (open_check_o_direct, ima_file_check, handle_truncate

I don't know about the first two, but handle_truncate won't be hit since
will_truncate is false.

> ). In this case
> these places are bugs too.
> 
> We can call vfs_unlink if we failed after a file was created, but
> possible affects need to be investigated.

We definitely don't want to try to undo the create with an unlink.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/locks.c b/fs/locks.c
index 9edfec4..ebe9a30 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -605,12 +605,86 @@  static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
 	return (locks_conflict(caller_fl, sys_fl));
 }
 
-/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
+static unsigned int
+deny_flags_to_cmd(unsigned int flags)
+{
+	unsigned int cmd = LOCK_MAND;
+
+	if (!(flags & O_DENYREAD))
+		cmd |= LOCK_READ;
+	if (!(flags & O_DENYWRITE))
+		cmd |= LOCK_WRITE;
+
+	return cmd;
+}
+
+/*
+ * locks_mand_conflict - Determine if there's a share reservation conflict
+ * @caller_fl: lock we're attempting to acquire
+ * @sys_fl: lock already present on system that we're checking against
+ *
+ * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE
+ * tell us whether the reservation allows other readers and writers.
+ *
+ * We only check against other LOCK_MAND locks, so applications that want to
+ * use share mode locking will only conflict against one another. "normal"
+ * applications that open files won't be affected by and won't themselves
+ * affect the share reservations.
+ */
+static int
+locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+	unsigned char caller_type = caller_fl->fl_type;
+	unsigned char sys_type = sys_fl->fl_type;
+	fmode_t caller_fmode = caller_fl->fl_file->f_mode;
+	fmode_t sys_fmode = sys_fl->fl_file->f_mode;
+
+	/* they can only conflict if they're both LOCK_MAND */
+	if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND))
+		return 0;
+
+	if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ))
+		return 1;
+	if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE))
+		return 1;
+	if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ))
+		return 1;
+	if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Determine if lock sys_fl blocks lock caller_fl. O_DENY* flags specific
+ * checking before calling the locks_conflict().
+ */
+static int
+deny_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+{
+	/*
+	 * FLOCK locks referring to the same filp do not conflict with
+	 * each other.
+	 */
+	if (!IS_FLOCK(sys_fl))
+		return 0;
+	if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
+		return locks_mand_conflict(caller_fl, sys_fl);
+	if (caller_fl->fl_file == sys_fl->fl_file)
+		return 0;
+
+	return locks_conflict(caller_fl, sys_fl);
+}
+
+/*
+ * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
  * checking before calling the locks_conflict().
  */
-static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
+static int
+flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
 {
-	/* FLOCK locks referring to the same filp do not conflict with
+	/*
+	 * FLOCK locks referring to the same filp do not conflict with
 	 * each other.
 	 */
 	if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
@@ -789,6 +863,32 @@  out:
 	return error;
 }
 
+/*
+ * Determine if a file is allowed to be opened with specified access and deny
+ * modes. Lock the file and return 0 if checks passed, otherwise return a error
+ * code.
+ */
+int
+deny_lock_file(struct file *filp)
+{
+	struct file_lock *lock;
+	int error = 0;
+
+	if (!(filp->f_flags & O_DENYMAND))
+		return error;
+
+	error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags));
+	if (error)
+		return error;
+
+	error = flock_lock_file(filp, lock, deny_locks_conflict);
+	if (error == -EAGAIN)
+		error = -ETXTBSY;
+
+	locks_free_lock(lock);
+	return error;
+}
+
 static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
 {
 	struct file_lock *fl;
diff --git a/fs/namei.c b/fs/namei.c
index 5f4cdf3..bf3bb34 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2569,9 +2569,14 @@  static int atomic_open(struct nameidata *nd, struct dentry *dentry,
 	 * here.
 	 */
 	error = may_open(&file->f_path, acc_mode, open_flag);
-	if (error)
+	if (error) {
 		fput(file);
+		goto out;
+	}
 
+	error = deny_lock_file(file);
+	if (error)
+		fput(file);
 out:
 	dput(dentry);
 	return error;
@@ -2908,6 +2913,9 @@  opened:
 		if (error)
 			goto exit_fput;
 	}
+	error = deny_lock_file(file);
+	if (error)
+		goto exit_fput;
 out:
 	if (got_write)
 		mnt_drop_write(nd->path.mnt);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 70a766ae..b8ed06e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1004,6 +1004,7 @@  extern int lease_modify(struct file_lock **, int);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 extern void locks_delete_block(struct file_lock *waiter);
+extern int deny_lock_file(struct file *);
 extern void lock_flocks(void);
 extern void unlock_flocks(void);
 #else /* !CONFIG_FILE_LOCKING */
@@ -1152,6 +1153,11 @@  static inline void locks_delete_block(struct file_lock *waiter)
 {
 }
 
+static inline int deny_lock_file(struct file *filp)
+{
+	return 0;
+}
+
 static inline void lock_flocks(void)
 {
 }