diff mbox

[1/2] locks: introduce i_blockleases to close lease races

Message ID 20110610001011.GD22215@fieldses.org (mailing list archive)
State New, archived
Headers show

Commit Message

Bruce Fields June 10, 2011, 12:10 a.m. UTC
From: J. Bruce Fields <bfields@redhat.com>

Since break_lease is called before i_writecount is incremented, there's
a window between the two where a setlease call would have no way to know
that an open is about to happen.

We fix this by adding a new inode field, i_blockleases, that is
incremented while a lease-breaking operation is in progress.

We will later reuse i_blockleases to enforce lease-breaking for rename,
unlink, etc.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/inode.c         |    1 +
 fs/locks.c         |   28 ++++++++++++++++++++++++++++
 fs/namei.c         |    6 +++++-
 include/linux/fs.h |    3 +++
 4 files changed, 37 insertions(+), 1 deletions(-)

Comments

Mimi Zohar June 10, 2011, 8:24 p.m. UTC | #1
On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> From: J. Bruce Fields <bfields@redhat.com>
> 
> Since break_lease is called before i_writecount is incremented, there's
> a window between the two where a setlease call would have no way to know
> that an open is about to happen.

So unless the break_lease() call is moved from may_open() to after 
nameidata_to_filp(), I don't see any other options.

Mimi

> We fix this by adding a new inode field, i_blockleases, that is
> incremented while a lease-breaking operation is in progress.
> 
> We will later reuse i_blockleases to enforce lease-breaking for rename,
> unlink, etc.
> 
> Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> ---
>  fs/inode.c         |    1 +
>  fs/locks.c         |   28 ++++++++++++++++++++++++++++
>  fs/namei.c         |    6 +++++-
>  include/linux/fs.h |    3 +++
>  4 files changed, 37 insertions(+), 1 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index 33c963d..4f253a2 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -198,6 +198,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
>  	inode->i_uid = 0;
>  	inode->i_gid = 0;
>  	atomic_set(&inode->i_writecount, 0);
> +	atomic_set(&inode->i_blockleases, 0);
>  	inode->i_size = 0;
>  	inode->i_blocks = 0;
>  	inode->i_bytes = 0;
> diff --git a/fs/locks.c b/fs/locks.c
> index 0a4f50d..7699b1b 100644
> --- a/fs/locks.c
> +++ b/fs/locks.c
> @@ -1164,6 +1164,32 @@ static void time_out_leases(struct inode *inode)
>  	}
>  }
> 
> +/* Disallow all leases (read or write): */
> +void disallow_leases(struct inode *inode, int flags)
> +{
> +	if (!inode)
> +		return;
> +	if (!S_ISREG(inode->i_mode))
> +		return;
> +	if ((flags & O_ACCMODE) == O_RDONLY)
> +		return;
> +	atomic_inc(&inode->i_blockleases);
> +}
> +EXPORT_SYMBOL_GPL(disallow_leases);
> +
> +void reallow_leases(struct inode *inode, int flags)
> +{
> +	if (!inode)
> +		return;
> +	if (!S_ISREG(inode->i_mode))
> +		return;
> +	if ((flags & O_ACCMODE) == O_RDONLY)
> +		return;
> +	BUG_ON(atomic_read(&inode->i_blockleases) <= 0);
> +	atomic_dec(&inode->i_blockleases);
> +}
> +EXPORT_SYMBOL_GPL(reallow_leases);
> +
>  /**
>   *	__break_lease	-	revoke all outstanding leases on file
>   *	@inode: the inode of the file to return
> @@ -1369,6 +1395,8 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
> 
>  	if (arg != F_UNLCK) {
>  		error = -EAGAIN;
> +		if (atomic_read(&inode->i_blockleases))
> +			goto out;
>  		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
>  			goto out;
>  		if ((arg == F_WRLCK)
> diff --git a/fs/namei.c b/fs/namei.c
> index 3cb616d..079e68c 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -2277,10 +2277,14 @@ ok:
>  		want_write = 1;
>  	}
>  common:
> +	disallow_leases(nd->path.dentry->d_inode, open_flag);
>  	error = may_open(&nd->path, acc_mode, open_flag);
> -	if (error)
> +	if (error) {
> +		reallow_leases(nd->path.dentry->d_inode, open_flag);
>  		goto exit;
> +	}
>  	filp = nameidata_to_filp(nd);
> +	reallow_leases(nd->path.dentry->d_inode, open_flag);
>  	if (!IS_ERR(filp)) {
>  		error = ima_file_check(filp, op->acc_mode);
>  		if (error) {
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 1b95af3..daf8443 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -796,6 +796,7 @@ struct inode {
>  #ifdef CONFIG_IMA
>  	atomic_t		i_readcount; /* struct files open RO */
>  #endif
> +	atomic_t		i_blockleases; /* setlease fails when >0 */
>  	atomic_t		i_writecount;
>  #ifdef CONFIG_SECURITY
>  	void			*i_security;
> @@ -1161,6 +1162,8 @@ extern void lease_get_mtime(struct inode *, struct timespec *time);
>  extern int generic_setlease(struct file *, long, struct file_lock **);
>  extern int vfs_setlease(struct file *, long, struct file_lock **);
>  extern int lease_modify(struct file_lock **, int);
> +extern void disallow_leases(struct inode *, int flags);
> +extern void reallow_leases(struct inode *, int flags);
>  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
>  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
>  extern void lock_flocks(void);


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 10, 2011, 9:34 p.m. UTC | #2
On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > From: J. Bruce Fields <bfields@redhat.com>
> > 
> > Since break_lease is called before i_writecount is incremented, there's
> > a window between the two where a setlease call would have no way to know
> > that an open is about to happen.
> 
> So unless the break_lease() call is moved from may_open() to after 
> nameidata_to_filp(), I don't see any other options.

Actually, offhand I can't see why that wouldn't be OK.

Though I think we still end up needing something like i_blockleases to
handle unlink, link, rename, chown, and chmod.

--b.

> 
> Mimi
> 
> > We fix this by adding a new inode field, i_blockleases, that is
> > incremented while a lease-breaking operation is in progress.
> > 
> > We will later reuse i_blockleases to enforce lease-breaking for rename,
> > unlink, etc.
> > 
> > Signed-off-by: J. Bruce Fields <bfields@redhat.com>
> > ---
> >  fs/inode.c         |    1 +
> >  fs/locks.c         |   28 ++++++++++++++++++++++++++++
> >  fs/namei.c         |    6 +++++-
> >  include/linux/fs.h |    3 +++
> >  4 files changed, 37 insertions(+), 1 deletions(-)
> > 
> > diff --git a/fs/inode.c b/fs/inode.c
> > index 33c963d..4f253a2 100644
> > --- a/fs/inode.c
> > +++ b/fs/inode.c
> > @@ -198,6 +198,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
> >  	inode->i_uid = 0;
> >  	inode->i_gid = 0;
> >  	atomic_set(&inode->i_writecount, 0);
> > +	atomic_set(&inode->i_blockleases, 0);
> >  	inode->i_size = 0;
> >  	inode->i_blocks = 0;
> >  	inode->i_bytes = 0;
> > diff --git a/fs/locks.c b/fs/locks.c
> > index 0a4f50d..7699b1b 100644
> > --- a/fs/locks.c
> > +++ b/fs/locks.c
> > @@ -1164,6 +1164,32 @@ static void time_out_leases(struct inode *inode)
> >  	}
> >  }
> > 
> > +/* Disallow all leases (read or write): */
> > +void disallow_leases(struct inode *inode, int flags)
> > +{
> > +	if (!inode)
> > +		return;
> > +	if (!S_ISREG(inode->i_mode))
> > +		return;
> > +	if ((flags & O_ACCMODE) == O_RDONLY)
> > +		return;
> > +	atomic_inc(&inode->i_blockleases);
> > +}
> > +EXPORT_SYMBOL_GPL(disallow_leases);
> > +
> > +void reallow_leases(struct inode *inode, int flags)
> > +{
> > +	if (!inode)
> > +		return;
> > +	if (!S_ISREG(inode->i_mode))
> > +		return;
> > +	if ((flags & O_ACCMODE) == O_RDONLY)
> > +		return;
> > +	BUG_ON(atomic_read(&inode->i_blockleases) <= 0);
> > +	atomic_dec(&inode->i_blockleases);
> > +}
> > +EXPORT_SYMBOL_GPL(reallow_leases);
> > +
> >  /**
> >   *	__break_lease	-	revoke all outstanding leases on file
> >   *	@inode: the inode of the file to return
> > @@ -1369,6 +1395,8 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
> > 
> >  	if (arg != F_UNLCK) {
> >  		error = -EAGAIN;
> > +		if (atomic_read(&inode->i_blockleases))
> > +			goto out;
> >  		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
> >  			goto out;
> >  		if ((arg == F_WRLCK)
> > diff --git a/fs/namei.c b/fs/namei.c
> > index 3cb616d..079e68c 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -2277,10 +2277,14 @@ ok:
> >  		want_write = 1;
> >  	}
> >  common:
> > +	disallow_leases(nd->path.dentry->d_inode, open_flag);
> >  	error = may_open(&nd->path, acc_mode, open_flag);
> > -	if (error)
> > +	if (error) {
> > +		reallow_leases(nd->path.dentry->d_inode, open_flag);
> >  		goto exit;
> > +	}
> >  	filp = nameidata_to_filp(nd);
> > +	reallow_leases(nd->path.dentry->d_inode, open_flag);
> >  	if (!IS_ERR(filp)) {
> >  		error = ima_file_check(filp, op->acc_mode);
> >  		if (error) {
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 1b95af3..daf8443 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -796,6 +796,7 @@ struct inode {
> >  #ifdef CONFIG_IMA
> >  	atomic_t		i_readcount; /* struct files open RO */
> >  #endif
> > +	atomic_t		i_blockleases; /* setlease fails when >0 */
> >  	atomic_t		i_writecount;
> >  #ifdef CONFIG_SECURITY
> >  	void			*i_security;
> > @@ -1161,6 +1162,8 @@ extern void lease_get_mtime(struct inode *, struct timespec *time);
> >  extern int generic_setlease(struct file *, long, struct file_lock **);
> >  extern int vfs_setlease(struct file *, long, struct file_lock **);
> >  extern int lease_modify(struct file_lock **, int);
> > +extern void disallow_leases(struct inode *, int flags);
> > +extern void reallow_leases(struct inode *, int flags);
> >  extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
> >  extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
> >  extern void lock_flocks(void);
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 12, 2011, 4:08 a.m. UTC | #3
On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > From: J. Bruce Fields <bfields@redhat.com>
> > > 
> > > Since break_lease is called before i_writecount is incremented, there's
> > > a window between the two where a setlease call would have no way to know
> > > that an open is about to happen.
> > 
> > So unless the break_lease() call is moved from may_open() to after 
> > nameidata_to_filp(), I don't see any other options.
> 
> Actually, offhand I can't see why that wouldn't be OK.
> 
> Though I think we still end up needing something like i_blockleases to
> handle unlink, link, rename, chown, and chmod.

Well, I guess there's a bizarre alternative that wouldn't require a new
inode field:

What we care about is conflicts between read leases and operations that
modify the metadata of the inode or the set of names pointing to it.

As far as I can tell those operations all take the i_mutex either on the
inode itself or on the parents of one of its aliases.

So, you could prevent break_lease/setlease races by calling setlease
under *all* of those i_mutexes:

	- take i_mutex on the inode
	- take i_lock to prevent the set of aliases from changing
	- take i_mutex for parent of each alias
	- set the lease
	- drop the parent i_mutexes, etc.

where the i_mutexes would all be taken with mutex_trylock, and we'd just
fail the whole setlease if any of them failed.

???

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mimi Zohar June 12, 2011, 7:10 p.m. UTC | #4
On Sun, 2011-06-12 at 00:08 -0400, J. Bruce Fields wrote:
> On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> > On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > > From: J. Bruce Fields <bfields@redhat.com>
> > > > 
> > > > Since break_lease is called before i_writecount is incremented, there's
> > > > a window between the two where a setlease call would have no way to know
> > > > that an open is about to happen.
> > > 
> > > So unless the break_lease() call is moved from may_open() to after 
> > > nameidata_to_filp(), I don't see any other options.
> > 
> > Actually, offhand I can't see why that wouldn't be OK.
> > 
> > Though I think we still end up needing something like i_blockleases to
> > handle unlink, link, rename, chown, and chmod.
> 
> Well, I guess there's a bizarre alternative that wouldn't require a new
> inode field:

In lieu of adding a new inode field, another possible option, a bit
kludgy, would be extending i_flock with an additional fl_flag
FL_BLOCKLEASE.

#define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)

Mimi

> What we care about is conflicts between read leases and operations that
> modify the metadata of the inode or the set of names pointing to it.
> 
> As far as I can tell those operations all take the i_mutex either on the
> inode itself or on the parents of one of its aliases.
> 
> So, you could prevent break_lease/setlease races by calling setlease
> under *all* of those i_mutexes:
> 
> 	- take i_mutex on the inode
> 	- take i_lock to prevent the set of aliases from changing
> 	- take i_mutex for parent of each alias
> 	- set the lease
> 	- drop the parent i_mutexes, etc.
> 
> where the i_mutexes would all be taken with mutex_trylock, and we'd just
> fail the whole setlease if any of them failed.
> 
> ???
> 
> --b.


--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 12, 2011, 7:12 p.m. UTC | #5
On Sun, Jun 12, 2011 at 03:10:04PM -0400, Mimi Zohar wrote:
> On Sun, 2011-06-12 at 00:08 -0400, J. Bruce Fields wrote:
> > On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> > > On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > > > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > > > From: J. Bruce Fields <bfields@redhat.com>
> > > > > 
> > > > > Since break_lease is called before i_writecount is incremented, there's
> > > > > a window between the two where a setlease call would have no way to know
> > > > > that an open is about to happen.
> > > > 
> > > > So unless the break_lease() call is moved from may_open() to after 
> > > > nameidata_to_filp(), I don't see any other options.
> > > 
> > > Actually, offhand I can't see why that wouldn't be OK.
> > > 
> > > Though I think we still end up needing something like i_blockleases to
> > > handle unlink, link, rename, chown, and chmod.
> > 
> > Well, I guess there's a bizarre alternative that wouldn't require a new
> > inode field:
> 
> In lieu of adding a new inode field, another possible option, a bit
> kludgy, would be extending i_flock with an additional fl_flag
> FL_BLOCKLEASE.
> 
> #define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)

Alas, that would mean adding and removing one of these file locks around
every single link, unlink, rename,....

--b.

> 
> Mimi
> 
> > What we care about is conflicts between read leases and operations that
> > modify the metadata of the inode or the set of names pointing to it.
> > 
> > As far as I can tell those operations all take the i_mutex either on the
> > inode itself or on the parents of one of its aliases.
> > 
> > So, you could prevent break_lease/setlease races by calling setlease
> > under *all* of those i_mutexes:
> > 
> > 	- take i_mutex on the inode
> > 	- take i_lock to prevent the set of aliases from changing
> > 	- take i_mutex for parent of each alias
> > 	- set the lease
> > 	- drop the parent i_mutexes, etc.
> > 
> > where the i_mutexes would all be taken with mutex_trylock, and we'd just
> > fail the whole setlease if any of them failed.
> > 
> > ???
> > 
> > --b.
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mimi Zohar June 12, 2011, 8:54 p.m. UTC | #6
On Sun, 2011-06-12 at 15:12 -0400, J. Bruce Fields wrote: 
> On Sun, Jun 12, 2011 at 03:10:04PM -0400, Mimi Zohar wrote:
> > On Sun, 2011-06-12 at 00:08 -0400, J. Bruce Fields wrote:
> > > On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> > > > On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > > > > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > > > > From: J. Bruce Fields <bfields@redhat.com>
> > > > > > 
> > > > > > Since break_lease is called before i_writecount is incremented, there's
> > > > > > a window between the two where a setlease call would have no way to know
> > > > > > that an open is about to happen.
> > > > > 
> > > > > So unless the break_lease() call is moved from may_open() to after 
> > > > > nameidata_to_filp(), I don't see any other options.
> > > > 
> > > > Actually, offhand I can't see why that wouldn't be OK.
> > > > 
> > > > Though I think we still end up needing something like i_blockleases to
> > > > handle unlink, link, rename, chown, and chmod.
> > > 
> > > Well, I guess there's a bizarre alternative that wouldn't require a new
> > > inode field:
> > 
> > In lieu of adding a new inode field, another possible option, a bit
> > kludgy, would be extending i_flock with an additional fl_flag
> > FL_BLOCKLEASE.
> > 
> > #define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)
> 
> Alas, that would mean adding and removing one of these file locks around
> every single link, unlink, rename,....
> 
> --b.

You're adding a call to break_lease() for each of them.  Currently
__break_lease() is only called if a lease exists. Assuming there aren't
any existing leases, couldn't break_lease() call something like
block_lease()?  The free would be after the link, unlink, ...,
completed/failed.

(You wouldn't actually need to alloc/free the 'struct file_lock' each
time, just set the pointer and reset to NULL.)

Mimi

> > 
> > Mimi
> > 
> > > What we care about is conflicts between read leases and operations that
> > > modify the metadata of the inode or the set of names pointing to it.
> > > 
> > > As far as I can tell those operations all take the i_mutex either on the
> > > inode itself or on the parents of one of its aliases.
> > > 
> > > So, you could prevent break_lease/setlease races by calling setlease
> > > under *all* of those i_mutexes:
> > > 
> > > 	- take i_mutex on the inode
> > > 	- take i_lock to prevent the set of aliases from changing
> > > 	- take i_mutex for parent of each alias
> > > 	- set the lease
> > > 	- drop the parent i_mutexes, etc.
> > > 
> > > where the i_mutexes would all be taken with mutex_trylock, and we'd just
> > > fail the whole setlease if any of them failed.
> > > 
> > > ???
> > > 
> > > --b.
> > 
> > 



--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 13, 2011, 12:19 p.m. UTC | #7
On Sun, Jun 12, 2011 at 04:54:33PM -0400, Mimi Zohar wrote:
> On Sun, 2011-06-12 at 15:12 -0400, J. Bruce Fields wrote: 
> > On Sun, Jun 12, 2011 at 03:10:04PM -0400, Mimi Zohar wrote:
> > > On Sun, 2011-06-12 at 00:08 -0400, J. Bruce Fields wrote:
> > > > On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> > > > > On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > > > > > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > > > > > From: J. Bruce Fields <bfields@redhat.com>
> > > > > > > 
> > > > > > > Since break_lease is called before i_writecount is incremented, there's
> > > > > > > a window between the two where a setlease call would have no way to know
> > > > > > > that an open is about to happen.
> > > > > > 
> > > > > > So unless the break_lease() call is moved from may_open() to after 
> > > > > > nameidata_to_filp(), I don't see any other options.
> > > > > 
> > > > > Actually, offhand I can't see why that wouldn't be OK.
> > > > > 
> > > > > Though I think we still end up needing something like i_blockleases to
> > > > > handle unlink, link, rename, chown, and chmod.
> > > > 
> > > > Well, I guess there's a bizarre alternative that wouldn't require a new
> > > > inode field:
> > > 
> > > In lieu of adding a new inode field, another possible option, a bit
> > > kludgy, would be extending i_flock with an additional fl_flag
> > > FL_BLOCKLEASE.
> > > 
> > > #define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)
> > 
> > Alas, that would mean adding and removing one of these file locks around
> > every single link, unlink, rename,....
> > 
> > --b.
> 
> You're adding a call to break_lease() for each of them.  Currently
> __break_lease() is only called if a lease exists. Assuming there aren't
> any existing leases, couldn't break_lease() call something like
> block_lease()?  The free would be after the link, unlink, ...,
> completed/failed.
> 
> (You wouldn't actually need to alloc/free the 'struct file_lock' each
> time, just set the pointer and reset to NULL.)

Well, the pointer has to be set to something.  I suppose we could put a
struct file_lock on the stack.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mimi Zohar June 13, 2011, 8:37 p.m. UTC | #8
On Mon, 2011-06-13 at 08:19 -0400, J. Bruce Fields wrote:
> On Sun, Jun 12, 2011 at 04:54:33PM -0400, Mimi Zohar wrote:
> > On Sun, 2011-06-12 at 15:12 -0400, J. Bruce Fields wrote: 
> > > On Sun, Jun 12, 2011 at 03:10:04PM -0400, Mimi Zohar wrote:
> > > > On Sun, 2011-06-12 at 00:08 -0400, J. Bruce Fields wrote:
> > > > > On Fri, Jun 10, 2011 at 05:34:46PM -0400, J. Bruce Fields wrote:
> > > > > > On Fri, Jun 10, 2011 at 04:24:00PM -0400, Mimi Zohar wrote:
> > > > > > > On Thu, 2011-06-09 at 20:10 -0400, J. Bruce Fields wrote:
> > > > > > > > From: J. Bruce Fields <bfields@redhat.com>
> > > > > > > > 
> > > > > > > > Since break_lease is called before i_writecount is incremented, there's
> > > > > > > > a window between the two where a setlease call would have no way to know
> > > > > > > > that an open is about to happen.
> > > > > > > 
> > > > > > > So unless the break_lease() call is moved from may_open() to after 
> > > > > > > nameidata_to_filp(), I don't see any other options.
> > > > > > 
> > > > > > Actually, offhand I can't see why that wouldn't be OK.
> > > > > > 
> > > > > > Though I think we still end up needing something like i_blockleases to
> > > > > > handle unlink, link, rename, chown, and chmod.
> > > > > 
> > > > > Well, I guess there's a bizarre alternative that wouldn't require a new
> > > > > inode field:
> > > > 
> > > > In lieu of adding a new inode field, another possible option, a bit
> > > > kludgy, would be extending i_flock with an additional fl_flag
> > > > FL_BLOCKLEASE.
> > > > 
> > > > #define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)
> > > 
> > > Alas, that would mean adding and removing one of these file locks around
> > > every single link, unlink, rename,....
> > > 
> > > --b.
> > 
> > You're adding a call to break_lease() for each of them.  Currently
> > __break_lease() is only called if a lease exists. Assuming there aren't
> > any existing leases, couldn't break_lease() call something like
> > block_lease()?  The free would be after the link, unlink, ...,
> > completed/failed.
> > 
> > (You wouldn't actually need to alloc/free the 'struct file_lock' each
> > time, just set the pointer and reset to NULL.)
> 
> Well, the pointer has to be set to something.  I suppose we could put a
> struct file_lock on the stack.
> 
> --b.

Instead of putting the struct file_lock on the stack, how about creating
a dummy list containing a single element with FL_BLOCKLEASE set?

Mimi

--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 14, 2011, 12:35 a.m. UTC | #9
On Mon, Jun 13, 2011 at 04:37:03PM -0400, Mimi Zohar wrote:
> On Mon, 2011-06-13 at 08:19 -0400, J. Bruce Fields wrote:
> > On Sun, Jun 12, 2011 at 04:54:33PM -0400, Mimi Zohar wrote:
> > > On Sun, 2011-06-12 at 15:12 -0400, J. Bruce Fields wrote: 
> > > > On Sun, Jun 12, 2011 at 03:10:04PM -0400, Mimi Zohar wrote:
> > > > > In lieu of adding a new inode field, another possible option, a bit
> > > > > kludgy, would be extending i_flock with an additional fl_flag
> > > > > FL_BLOCKLEASE.
> > > > > 
> > > > > #define IS_BLOCKLEASE(fl)    (fl->fl_flags & FL_BLOCKLEASE)
> > > > 
> > > > Alas, that would mean adding and removing one of these file locks around
> > > > every single link, unlink, rename,....
> > > > 
> > > > --b.
> > > 
> > > You're adding a call to break_lease() for each of them.  Currently
> > > __break_lease() is only called if a lease exists. Assuming there aren't
> > > any existing leases, couldn't break_lease() call something like
> > > block_lease()?  The free would be after the link, unlink, ...,
> > > completed/failed.
> > > 
> > > (You wouldn't actually need to alloc/free the 'struct file_lock' each
> > > time, just set the pointer and reset to NULL.)
> > 
> > Well, the pointer has to be set to something.  I suppose we could put a
> > struct file_lock on the stack.
> > 
> > --b.
> 
> Instead of putting the struct file_lock on the stack, how about creating
> a dummy list containing a single element with FL_BLOCKLEASE set?

I'm afraid I don't understand what you're proposing.  Could you explain
in more detail?

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bruce Fields June 15, 2011, 3:47 p.m. UTC | #10
On Mon, Jun 13, 2011 at 08:19:39AM -0400, J. Bruce Fields wrote:
> On Sun, Jun 12, 2011 at 04:54:33PM -0400, Mimi Zohar wrote:
> > You're adding a call to break_lease() for each of them.  Currently
> > __break_lease() is only called if a lease exists. Assuming there aren't
> > any existing leases, couldn't break_lease() call something like
> > block_lease()?  The free would be after the link, unlink, ...,
> > completed/failed.
> > 
> > (You wouldn't actually need to alloc/free the 'struct file_lock' each
> > time, just set the pointer and reset to NULL.)
> 
> Well, the pointer has to be set to something.  I suppose we could put a
> struct file_lock on the stack.

The locking code is under a global spinlock--instead of an atomic inc
and dec of inode->i_blockleases we'd be doing a pair of lock/unlocks of
file_lock_lock.

We could probably fix that: off the top of my head the only reason I see
for a global lock is the stupid deadlock-detection code.  Which is only
needed for posix locks (and I'm not at all convinced it's needed even
there).

With that fixed, it would be a choice between an i_lock-protected
list_add/list_del vs. an atomic inc/dec of a new inode field.

--b.
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/inode.c b/fs/inode.c
index 33c963d..4f253a2 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -198,6 +198,7 @@  int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_uid = 0;
 	inode->i_gid = 0;
 	atomic_set(&inode->i_writecount, 0);
+	atomic_set(&inode->i_blockleases, 0);
 	inode->i_size = 0;
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
diff --git a/fs/locks.c b/fs/locks.c
index 0a4f50d..7699b1b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1164,6 +1164,32 @@  static void time_out_leases(struct inode *inode)
 	}
 }
 
+/* Disallow all leases (read or write): */
+void disallow_leases(struct inode *inode, int flags)
+{
+	if (!inode)
+		return;
+	if (!S_ISREG(inode->i_mode))
+		return;
+	if ((flags & O_ACCMODE) == O_RDONLY)
+		return;
+	atomic_inc(&inode->i_blockleases);
+}
+EXPORT_SYMBOL_GPL(disallow_leases);
+
+void reallow_leases(struct inode *inode, int flags)
+{
+	if (!inode)
+		return;
+	if (!S_ISREG(inode->i_mode))
+		return;
+	if ((flags & O_ACCMODE) == O_RDONLY)
+		return;
+	BUG_ON(atomic_read(&inode->i_blockleases) <= 0);
+	atomic_dec(&inode->i_blockleases);
+}
+EXPORT_SYMBOL_GPL(reallow_leases);
+
 /**
  *	__break_lease	-	revoke all outstanding leases on file
  *	@inode: the inode of the file to return
@@ -1369,6 +1395,8 @@  int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 
 	if (arg != F_UNLCK) {
 		error = -EAGAIN;
+		if (atomic_read(&inode->i_blockleases))
+			goto out;
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
 			goto out;
 		if ((arg == F_WRLCK)
diff --git a/fs/namei.c b/fs/namei.c
index 3cb616d..079e68c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2277,10 +2277,14 @@  ok:
 		want_write = 1;
 	}
 common:
+	disallow_leases(nd->path.dentry->d_inode, open_flag);
 	error = may_open(&nd->path, acc_mode, open_flag);
-	if (error)
+	if (error) {
+		reallow_leases(nd->path.dentry->d_inode, open_flag);
 		goto exit;
+	}
 	filp = nameidata_to_filp(nd);
+	reallow_leases(nd->path.dentry->d_inode, open_flag);
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, op->acc_mode);
 		if (error) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1b95af3..daf8443 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -796,6 +796,7 @@  struct inode {
 #ifdef CONFIG_IMA
 	atomic_t		i_readcount; /* struct files open RO */
 #endif
+	atomic_t		i_blockleases; /* setlease fails when >0 */
 	atomic_t		i_writecount;
 #ifdef CONFIG_SECURITY
 	void			*i_security;
@@ -1161,6 +1162,8 @@  extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
+extern void disallow_leases(struct inode *, int flags);
+extern void reallow_leases(struct inode *, int flags);
 extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
 extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 extern void lock_flocks(void);