diff mbox

aio: Fix freeze protection of aio writes

Message ID 1452178984-17540-1-git-send-email-jack@suse.cz (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kara Jan. 7, 2016, 3:03 p.m. UTC
Currently we dropped freeze protection of aio writes just after IO was
submitted. Thus aio write could be in flight while the filesystem was
frozen and that could result in unexpected situation like aio completion
wanting to convert extent type on frozen filesystem. Testcase from
Dmitry triggering this is like:

for ((i=0;i<60;i++));do fsfreeze -f /mnt ;sleep 1;fsfreeze -u /mnt;done &
fio --bs=4k --ioengine=libaio --iodepth=128 --size=1g --direct=1 \
    --runtime=60 --filename=/mnt/file --name=rand-write --rw=randwrite

Fix the problem by dropping freeze protection only once IO is completed
in aio_complete().

Reported-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/aio.c           | 31 ++++++++++++++++++++++++++++---
 include/linux/fs.h |  1 +
 2 files changed, 29 insertions(+), 3 deletions(-)

This patch seems to have fallen through the cracks. Al / Ben, can you please
merge it? Thanks!

Comments

Benjamin LaHaise Jan. 7, 2016, 3:07 p.m. UTC | #1
On Thu, Jan 07, 2016 at 04:03:04PM +0100, Jan Kara wrote:
> Currently we dropped freeze protection of aio writes just after IO was
> submitted. Thus aio write could be in flight while the filesystem was
> frozen and that could result in unexpected situation like aio completion
> wanting to convert extent type on frozen filesystem. Testcase from
> Dmitry triggering this is like:
> 
> for ((i=0;i<60;i++));do fsfreeze -f /mnt ;sleep 1;fsfreeze -u /mnt;done &
> fio --bs=4k --ioengine=libaio --iodepth=128 --size=1g --direct=1 \
>     --runtime=60 --filename=/mnt/file --name=rand-write --rw=randwrite
> 
> Fix the problem by dropping freeze protection only once IO is completed
> in aio_complete().

Why isn't this code placed in file_start_write() and file_end_write()?  
That makes more sense to me than sprinkling it in the aio code.

		-ben

> Reported-by: Dmitry Monakhov <dmonakhov@openvz.org>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/aio.c           | 31 ++++++++++++++++++++++++++++---
>  include/linux/fs.h |  1 +
>  2 files changed, 29 insertions(+), 3 deletions(-)
> 
> This patch seems to have fallen through the cracks. Al / Ben, can you please
> merge it? Thanks!
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 155f84253f33..ee0871cb4677 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -1065,6 +1065,19 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
>  	unsigned tail, pos, head;
>  	unsigned long	flags;
>  
> +	if (kiocb->ki_flags & IOCB_WRITE) {
> +		struct file *f = kiocb->ki_filp;
> +
> +		/*
> +		 * Tell lockdep we inherited freeze protection from submission
> +		 * thread.
> +		 */
> +		percpu_rwsem_acquire(
> +			&f->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
> +			1, _THIS_IP_);
> +		file_end_write(f);
> +	}
> +
>  	/*
>  	 * Special case handling for sync iocbs:
>  	 *  - events go directly into the iocb for fast handling
> @@ -1449,13 +1462,25 @@ rw_common:
>  
>  		len = ret;
>  
> -		if (rw == WRITE)
> +		if (rw == WRITE) {
>  			file_start_write(file);
> +			req->ki_flags |= IOCB_WRITE;
> +		}
>  
>  		ret = iter_op(req, &iter);
>  
> -		if (rw == WRITE)
> -			file_end_write(file);
> +		if (rw == WRITE) {
> +			/*
> +			 * We release freeze protection in aio_complete(). Fool
> +			 * lockdep by telling it the lock got released so that
> +			 * it doesn't complain about held lock when we return
> +			 * to userspace.
> +			 */
> +			percpu_rwsem_release(
> +				&file->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
> +				1, _THIS_IP_);
> +		}
> +
>  		kfree(iovec);
>  		break;
>  
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 3aa514254161..54af40ed6a26 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -319,6 +319,7 @@ struct writeback_control;
>  #define IOCB_EVENTFD		(1 << 0)
>  #define IOCB_APPEND		(1 << 1)
>  #define IOCB_DIRECT		(1 << 2)
> +#define IOCB_WRITE		(1 << 3)
>  
>  struct kiocb {
>  	struct file		*ki_filp;
> -- 
> 2.6.2
Benjamin LaHaise Jan. 7, 2016, 3:08 p.m. UTC | #2
On Thu, Jan 07, 2016 at 10:07:05AM -0500, Benjamin LaHaise wrote:
> On Thu, Jan 07, 2016 at 04:03:04PM +0100, Jan Kara wrote:
> > Fix the problem by dropping freeze protection only once IO is completed
> > in aio_complete().
> 
> Why isn't this code placed in file_start_write() and file_end_write()?  
> That makes more sense to me than sprinkling it in the aio code.

And yes, the file_end_write() call should be moved to aio completion as 
well.

		-ben
Jan Kara Jan. 7, 2016, 3:15 p.m. UTC | #3
On Thu 07-01-16 10:07:05, Benjamin LaHaise wrote:
> On Thu, Jan 07, 2016 at 04:03:04PM +0100, Jan Kara wrote:
> > Currently we dropped freeze protection of aio writes just after IO was
> > submitted. Thus aio write could be in flight while the filesystem was
> > frozen and that could result in unexpected situation like aio completion
> > wanting to convert extent type on frozen filesystem. Testcase from
> > Dmitry triggering this is like:
> > 
> > for ((i=0;i<60;i++));do fsfreeze -f /mnt ;sleep 1;fsfreeze -u /mnt;done &
> > fio --bs=4k --ioengine=libaio --iodepth=128 --size=1g --direct=1 \
> >     --runtime=60 --filename=/mnt/file --name=rand-write --rw=randwrite
> > 
> > Fix the problem by dropping freeze protection only once IO is completed
> > in aio_complete().
> 
> Why isn't this code placed in file_start_write() and file_end_write()?  
> That makes more sense to me than sprinkling it in the aio code.

Well, that would completely defeat the purpose of lockdep annotations for
fs freeze protection - we want lockdep to treat file_start_write() -
file_end_write() pair as a lock-unlock pair. But in case of AIO the unlock
will happen from a different process than lock and lockdep cannot handle
such cases. That's why for AIO we have to add some lockdep magic to avoid
false warnings.

								Honza

> > Reported-by: Dmitry Monakhov <dmonakhov@openvz.org>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  fs/aio.c           | 31 ++++++++++++++++++++++++++++---
> >  include/linux/fs.h |  1 +
> >  2 files changed, 29 insertions(+), 3 deletions(-)
> > 
> > This patch seems to have fallen through the cracks. Al / Ben, can you please
> > merge it? Thanks!
> > 
> > diff --git a/fs/aio.c b/fs/aio.c
> > index 155f84253f33..ee0871cb4677 100644
> > --- a/fs/aio.c
> > +++ b/fs/aio.c
> > @@ -1065,6 +1065,19 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
> >  	unsigned tail, pos, head;
> >  	unsigned long	flags;
> >  
> > +	if (kiocb->ki_flags & IOCB_WRITE) {
> > +		struct file *f = kiocb->ki_filp;
> > +
> > +		/*
> > +		 * Tell lockdep we inherited freeze protection from submission
> > +		 * thread.
> > +		 */
> > +		percpu_rwsem_acquire(
> > +			&f->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
> > +			1, _THIS_IP_);
> > +		file_end_write(f);
> > +	}
> > +
> >  	/*
> >  	 * Special case handling for sync iocbs:
> >  	 *  - events go directly into the iocb for fast handling
> > @@ -1449,13 +1462,25 @@ rw_common:
> >  
> >  		len = ret;
> >  
> > -		if (rw == WRITE)
> > +		if (rw == WRITE) {
> >  			file_start_write(file);
> > +			req->ki_flags |= IOCB_WRITE;
> > +		}
> >  
> >  		ret = iter_op(req, &iter);
> >  
> > -		if (rw == WRITE)
> > -			file_end_write(file);
> > +		if (rw == WRITE) {
> > +			/*
> > +			 * We release freeze protection in aio_complete(). Fool
> > +			 * lockdep by telling it the lock got released so that
> > +			 * it doesn't complain about held lock when we return
> > +			 * to userspace.
> > +			 */
> > +			percpu_rwsem_release(
> > +				&file->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
> > +				1, _THIS_IP_);
> > +		}
> > +
> >  		kfree(iovec);
> >  		break;
> >  
> > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > index 3aa514254161..54af40ed6a26 100644
> > --- a/include/linux/fs.h
> > +++ b/include/linux/fs.h
> > @@ -319,6 +319,7 @@ struct writeback_control;
> >  #define IOCB_EVENTFD		(1 << 0)
> >  #define IOCB_APPEND		(1 << 1)
> >  #define IOCB_DIRECT		(1 << 2)
> > +#define IOCB_WRITE		(1 << 3)
> >  
> >  struct kiocb {
> >  	struct file		*ki_filp;
> > -- 
> > 2.6.2
> 
> -- 
> "Thought is the essence of where you are now."
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin LaHaise Jan. 7, 2016, 4:24 p.m. UTC | #4
On Thu, Jan 07, 2016 at 04:15:52PM +0100, Jan Kara wrote:
> On Thu 07-01-16 10:07:05, Benjamin LaHaise wrote:
...
> > Why isn't this code placed in file_start_write() and file_end_write()?  
> > That makes more sense to me than sprinkling it in the aio code.
> 
> Well, that would completely defeat the purpose of lockdep annotations for
> fs freeze protection - we want lockdep to treat file_start_write() -
> file_end_write() pair as a lock-unlock pair. But in case of AIO the unlock
> will happen from a different process than lock and lockdep cannot handle
> such cases. That's why for AIO we have to add some lockdep magic to avoid
> false warnings.

Okay, makes sense.  Applied.  I'll forward that with the other fix that 
is pending.

		-ben
diff mbox

Patch

diff --git a/fs/aio.c b/fs/aio.c
index 155f84253f33..ee0871cb4677 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1065,6 +1065,19 @@  static void aio_complete(struct kiocb *kiocb, long res, long res2)
 	unsigned tail, pos, head;
 	unsigned long	flags;
 
+	if (kiocb->ki_flags & IOCB_WRITE) {
+		struct file *f = kiocb->ki_filp;
+
+		/*
+		 * Tell lockdep we inherited freeze protection from submission
+		 * thread.
+		 */
+		percpu_rwsem_acquire(
+			&f->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
+			1, _THIS_IP_);
+		file_end_write(f);
+	}
+
 	/*
 	 * Special case handling for sync iocbs:
 	 *  - events go directly into the iocb for fast handling
@@ -1449,13 +1462,25 @@  rw_common:
 
 		len = ret;
 
-		if (rw == WRITE)
+		if (rw == WRITE) {
 			file_start_write(file);
+			req->ki_flags |= IOCB_WRITE;
+		}
 
 		ret = iter_op(req, &iter);
 
-		if (rw == WRITE)
-			file_end_write(file);
+		if (rw == WRITE) {
+			/*
+			 * We release freeze protection in aio_complete(). Fool
+			 * lockdep by telling it the lock got released so that
+			 * it doesn't complain about held lock when we return
+			 * to userspace.
+			 */
+			percpu_rwsem_release(
+				&file->f_inode->i_sb->s_writers.rw_sem[SB_FREEZE_WRITE-1],
+				1, _THIS_IP_);
+		}
+
 		kfree(iovec);
 		break;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3aa514254161..54af40ed6a26 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -319,6 +319,7 @@  struct writeback_control;
 #define IOCB_EVENTFD		(1 << 0)
 #define IOCB_APPEND		(1 << 1)
 #define IOCB_DIRECT		(1 << 2)
+#define IOCB_WRITE		(1 << 3)
 
 struct kiocb {
 	struct file		*ki_filp;