diff mbox series

[v3] writeback: Fix inode->i_io_list not be protected by inode->i_lock error

Message ID 20220504182514.25347-1-sunjunchao2870@gmail.com (mailing list archive)
State New, archived
Headers show
Series [v3] writeback: Fix inode->i_io_list not be protected by inode->i_lock error | expand

Commit Message

JunChao Sun May 4, 2022, 6:25 p.m. UTC
Commit b35250c0816c ("writeback: Protect inode->i_io_list with
inode->i_lock") made inode->i_io_list not only protected by
wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
was missed. Add lock there and also update comment describing things
protected by inode->i_lock.

Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
---
Thank you very much for your patient and detailed explanation.

 fs/fs-writeback.c | 17 +++++++++++------
 fs/inode.c        |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

--
2.17.1

Comments

Jan Kara May 4, 2022, 7:38 p.m. UTC | #1
On Wed 04-05-22 11:25:14, Jchao Sun wrote:
> Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> inode->i_lock") made inode->i_io_list not only protected by
> wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> was missed. Add lock there and also update comment describing things
> protected by inode->i_lock.
> 
> Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
> Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>

Almost there :). A few comments below:

> @@ -2402,6 +2404,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  			inode->i_state &= ~I_DIRTY_TIME;
>  		inode->i_state |= flags;
>  
> +		wb = locked_inode_to_wb_and_lock_list(inode);
> +		spin_lock(&inode->i_lock);
> +

We don't want to lock wb->list_lock if the inode was already dirty (which
is a common path). So you want something like:

		if (was_dirty)
			wb = locked_inode_to_wb_and_lock_list(inode);

(and initialize wb to NULL to make sure it does not contain stale value).

> @@ -2409,7 +2414,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  		 * list, based upon its state.
>  		 */
>  		if (inode->i_state & I_SYNC_QUEUED)
> -			goto out_unlock_inode;
> +			goto out_unlock;
>  
>  		/*
>  		 * Only add valid (hashed) inodes to the superblock's
> @@ -2417,22 +2422,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  		 */
>  		if (!S_ISBLK(inode->i_mode)) {
>  			if (inode_unhashed(inode))
> -				goto out_unlock_inode;
> +				goto out_unlock;
>  		}
>  		if (inode->i_state & I_FREEING)
> -			goto out_unlock_inode;
> +			goto out_unlock;
>  
>  		/*
>  		 * If the inode was already on b_dirty/b_io/b_more_io, don't
>  		 * reposition it (that would break b_dirty time-ordering).
>  		 */
>  		if (!was_dirty) {
> -			struct bdi_writeback *wb;
>  			struct list_head *dirty_list;
>  			bool wakeup_bdi = false;
>  
> -			wb = locked_inode_to_wb_and_lock_list(inode);
> -
>  			inode->dirtied_when = jiffies;
>  			if (dirtytime)
>  				inode->dirtied_time_when = jiffies;
> @@ -2446,6 +2448,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  							       dirty_list);
>  
>  			spin_unlock(&wb->list_lock);
> +			spin_unlock(&inode->i_lock);
>  			trace_writeback_dirty_inode_enqueue(inode);
>  
>  			/*
> @@ -2460,6 +2463,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>  			return;
>  		}
>  	}
> +out_unlock:
> +	spin_unlock(&wb->list_lock);

wb->list lock will not be locked in some cases here. So you have to be more
careful about when you need to unlock it. Probably something like:

	if (wb)
		spin_unlock(&wb->list_lock);

and you can put this at the end inside the block "if ((inode->i_state &
flags) != flags)".

Also I'd note it is good to test your changes (it would likely uncover the
locking problem). For these filesystem related things xfstests are useful:

https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git/

								Honza
JunChao Sun May 5, 2022, 5:01 a.m. UTC | #2
On Thu, May 5, 2022 at 3:38 AM Jan Kara <jack@suse.cz> wrote:
>
> On Wed 04-05-22 11:25:14, Jchao Sun wrote:
> > Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> > inode->i_lock") made inode->i_io_list not only protected by
> > wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> > was missed. Add lock there and also update comment describing things
> > protected by inode->i_lock.
> >
> > Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with inode->i_lock")
> > Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
>
> Almost there :). A few comments below:
>
> > @@ -2402,6 +2404,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> >                       inode->i_state &= ~I_DIRTY_TIME;
> >               inode->i_state |= flags;
> >
> > +             wb = locked_inode_to_wb_and_lock_list(inode);
> > +             spin_lock(&inode->i_lock);
> > +
>
>
> > We don't want to lock wb->list_lock if the inode was already dirty (which
> > is a common path). So you want something like:
> >
> >                 if (was_dirty)
> >                         wb = locked_inode_to_wb_and_lock_list(inode);
> >
> > (and initialize wb to NULL to make sure it does not contain stale value).

I'm a little confused about here. The logic of the current source tree
is like this:
                       if (!was_dirty) {
                               struct bdi_writeback *wb;
                               wb =  locked_inode_to_wb_and_lock_list(inode);
                               ...
                               dirty_list = &wb-> b_dirty_time;
                               assert_spin_locked(&wb->list_lock);
                       }
The logic is the opposite of the logic in the comments, and it seems
like that wb will
absolutely not be NULL.
Why is this? What is the difference between them?

If run with the logic in the comments, wb will only be initialized
when was_dirty != 0,
suppose was_dirty is 0, wb will not be initialized, and continue
running, will hit
                      if (!was_dirty) {
                              dirty_list = &wb->b_dirty_time;
                      }
will hit NULL pointer.
Is there something I have missed?

>
> > @@ -2409,7 +2414,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> >                * list, based upon its state.
> >                */
> >               if (inode->i_state & I_SYNC_QUEUED)
> > -                     goto out_unlock_inode;
> > +                     goto out_unlock;
> >
> >               /*
> >                * Only add valid (hashed) inodes to the superblock's
> > @@ -2417,22 +2422,19 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> >                */
> >               if (!S_ISBLK(inode->i_mode)) {
> >                       if (inode_unhashed(inode))
> > -                             goto out_unlock_inode;
> > +                             goto out_unlock;
> >               }
> >               if (inode->i_state & I_FREEING)
> > -                     goto out_unlock_inode;
> > +                     goto out_unlock;
> >
> >               /*
> >                * If the inode was already on b_dirty/b_io/b_more_io, don't
> >                * reposition it (that would break b_dirty time-ordering).
> >                */
> >               if (!was_dirty) {
> > -                     struct bdi_writeback *wb;
> >                       struct list_head *dirty_list;
> >                       bool wakeup_bdi = false;
> >
> > -                     wb = locked_inode_to_wb_and_lock_list(inode);
> > -
> >                       inode->dirtied_when = jiffies;
> >                       if (dirtytime)
> >                               inode->dirtied_time_when = jiffies;
> > @@ -2446,6 +2448,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> >                                                              dirty_list);
> >
> >                       spin_unlock(&wb->list_lock);
> > +                     spin_unlock(&inode->i_lock);
> >                       trace_writeback_dirty_inode_enqueue(inode);
> >
> >                       /*
> > @@ -2460,6 +2463,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
> >                       return;
> >               }
> >       }
>
> > > +out_unlock:
> > > +     spin_unlock(&wb->list_lock);

Ouch, this is so obvious now that you mention it. Really stupid
mistake on my side. It surprised me that local compile do not have warnings..
>
> wb->list lock will not be locked in some cases here. So you have to be more
> careful about when you need to unlock it. Probably something like:
>
>         if (wb)
>                 spin_unlock(&wb->list_lock);
>
> and you can put this at the end inside the block "if ((inode->i_state &
> flags) != flags)".
>
>
> > Also I'd note it is good to test your changes (it would likely uncover the
> > locking problem). For these filesystem related things xfstests are useful:
> >
> > https://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git/

Thanks a lot! I'll test this patch for ext4 fs using this tool.
>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
Jan Kara May 5, 2022, 9 a.m. UTC | #3
On Thu 05-05-22 12:45:56, Jchao sun wrote:
> On Thu, May 5, 2022 at 3:38 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Wed 04-05-22 11:25:14, Jchao Sun wrote:
> > > Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> > > inode->i_lock") made inode->i_io_list not only protected by
> > > wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> > > was missed. Add lock there and also update comment describing things
> > > protected by inode->i_lock.
> > >
> > > Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with
> inode->i_lock")
> > > Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
> >
> > Almost there :). A few comments below:
> >
> > > @@ -2402,6 +2404,9 @@ void __mark_inode_dirty(struct inode *inode, int
> flags)
> > >                       inode->i_state &= ~I_DIRTY_TIME;
> > >               inode->i_state |= flags;
> > >
> > > +             wb = locked_inode_to_wb_and_lock_list(inode);
> > > +             spin_lock(&inode->i_lock);
> > > +
> >
> 
> > > We don't want to lock wb->list_lock if the inode was already dirty (which
> > > is a common path). So you want something like:
> > >
> > >                 if (was_dirty)
> > >                         wb = locked_inode_to_wb_and_lock_list(inode);
> 
> I'm a little confused about here. The logic of the current source tree is
> like this:
>                        if (!was_dirty) {
>                                struct bdi_writeback *wb;
>                                wb =
> locked_inode_to_wb_and_lock_list(inode);
>                                ...
>                                dirty_list = &wb-> b_dirty_time;
>                                assert_spin_locked(&wb->list_lock);
>                        }
> The logic is the opposite of the logic in the comments, and it seems like
> that wb will
> absolutely not be NULL.
> Why is this? What is the difference between them?

Sorry, that was a typo in my suggestion. It should have been

                 if (!was_dirty)
                         wb = locked_inode_to_wb_and_lock_list(inode);

								Honza
JunChao Sun May 6, 2022, 4:04 p.m. UTC | #4
> On Thu, May 5, 2022 at 5:01 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 05-05-22 12:45:56, Jchao sun wrote:
> > > On Thu, May 5, 2022 at 3:38 AM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > On Wed 04-05-22 11:25:14, Jchao Sun wrote:
> > > > > Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> > > > > inode->i_lock") made inode->i_io_list not only protected by
> > > > > wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> > > > > was missed. Add lock there and also update comment describing things
> > > > > protected by inode->i_lock.
> > > > >
> > > > > Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with
> > > inode->i_lock")
> > > > > Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
> > > >
> > > > Almost there :). A few comments below:
> > > >
> > > > > @@ -2402,6 +2404,9 @@ void __mark_inode_dirty(struct inode *inode, int
> > > flags)
> > > > >                       inode->i_state &= ~I_DIRTY_TIME;
> > > > >               inode->i_state |= flags;
> > > > >
> > > > > +             wb = locked_inode_to_wb_and_lock_list(inode);
> > > > > +             spin_lock(&inode->i_lock);
> > > > > +
> > > >
> > >
> > > > > We don't want to lock wb->list_lock if the inode was already dirty (which
> > > > > is a common path). So you want something like:
> > > > >
> > > > >                 if (was_dirty)
> > > > >                         wb = locked_inode_to_wb_and_lock_list(inode);
> > >
> > > I'm a little confused about here. The logic of the current source tree is
> > > like this:
> > >                        if (!was_dirty) {
> > >                                struct bdi_writeback *wb;
> > >                                wb =
> > > locked_inode_to_wb_and_lock_list(inode);
> > >                                ...
> > >                                dirty_list = &wb-> b_dirty_time;
> > >                                assert_spin_locked(&wb->list_lock);
> > >                        }
> > > The logic is the opposite of the logic in the comments, and it seems like
> > > that wb will
> > > absolutely not be NULL.
> > > Why is this? What is the difference between them?
> >
> > Sorry, that was a typo in my suggestion. It should have been
> >
> >                  if (!was_dirty)
> >                          wb = locked_inode_to_wb_and_lock_list(inode);

1. I have noticed that move_expired_inodes() has the logic as follows:

                      list_move(&inode->i_io_list, &tmp);
                      spin_lock(&inode->i_lock);
                      inode->i_state |= I_SYNC_QUEUED;
                      spin_unlock(&inode->i_lock);
                      ...
                      list_move(&inode->i_io_list, dispatch_queue);

   Neither of the two operations on i_io_list are protected with
inode->i_lock. It looks like that
   do this on purpose, I'm a little confused about this.
   I wonder that is this a mistake. or did this on purpose and there
is something I have missed?
   If the later, why is that?

2. I also have some doubts about the results of testing for xfs with
xfstests.I'll describe my test
    steps later. The kernel version used for testing is
5.18.0-rc5-00016-g107c948d1d3e-dirty,
    and the latest commit is 107c948d1d3e ("Merge tag 'seccomp-v5.18-rc6' of
     git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux")

   <a> First tested without this patch, there are always a few fixed cases
          where it will fail, maybe I got something wrong.  Here is
the testing result.
          Failures: xfs/078 xfs/191-input-validation xfs/252 xfs/289
xfs/293 xfs/514
                                   xfs/515 xfs/544
          Failed 8 of 304 tests

   <b> Then tested with the patch which applied your suggestions. The
result is unstable.
           There is a high probability that there will be more
failures(which will report as follows),
           and a small probability that the test result is the same as
the above test which without
           this patch.

           xfs/206 ... umount: /mnt/test: target is busy.
           _check_xfs_filesystem: filesystem on /dev/loop0 has dirty log
           (see /root/xfstests-dev/results/xfs/206.full for details)
            _check_xfs_filesystem: filesystem on /dev/loop0 is inconsistent(r)
            (see /root/xfstests-dev/results/xfs/206.full for details)
            ...
           Failures: xfs/078 xfs/149 xfs/164 xfs/165
xfs/191-input-validation xfs/206 xfs/222
                          xfs/242 xfs/250 xfs/252 xfs/259 xfs/260
xfs/289 xfs/290 xfs/292 xfs/293
                          xfs/514 xfs/515 xfs/544
           Failed 19 of 304 tests.

           I saw that there is a "fatal error: couldn't initialize XFS
library" which means xfs_repair
           have failed.

   <c>  Lastly tested with the patch which applied your suggestions
and some modifications
           which made operations on i_io_list in move_expired_inodes()
will be protected by
           inode->i_lock. There is a high probability that  the result
is the same as the test in <a>,
           and a small probability the same as <b>

   I think I must be missing something I don't understand yet, do I?

3.   Here are my test steps.
            xfs_io -f -c "falloc 0 10g" test.img
            mkfs.xfs test.img
            losetup /dev/loop0 ./test.img
            mount /dev/loop0 /mnt/test
            export TEST_DEV=/dev/loop0
            export TEST_DIR=/mnt/test
            ./check -g xfs/quick
            reboot after the tests(If don't reboot, following test
results will become more unstable
            and have more failures)

            Repeat the above steps.

I'm sorry to bother you, but the results I got are so weird and I
really want to figure it out. Do you have
any suggestions for this? Looking forward to your reply and I'm happy
to provide more info if needed.


>
>                                                                 Honza
> --
> Jan Kara <jack@suse.com>
> SUSE Labs, CR
Jan Kara May 9, 2022, 1:40 p.m. UTC | #5
On Sat 07-05-22 00:04:45, Jchao sun wrote:
> > On Thu, May 5, 2022 at 5:01 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Thu 05-05-22 12:45:56, Jchao sun wrote:
> > > > On Thu, May 5, 2022 at 3:38 AM Jan Kara <jack@suse.cz> wrote:
> > > > >
> > > > > On Wed 04-05-22 11:25:14, Jchao Sun wrote:
> > > > > > Commit b35250c0816c ("writeback: Protect inode->i_io_list with
> > > > > > inode->i_lock") made inode->i_io_list not only protected by
> > > > > > wb->list_lock but also inode->i_lock, but inode_io_list_move_locked()
> > > > > > was missed. Add lock there and also update comment describing things
> > > > > > protected by inode->i_lock.
> > > > > >
> > > > > > Fixes: b35250c0816c ("writeback: Protect inode->i_io_list with
> > > > inode->i_lock")
> > > > > > Signed-off-by: Jchao Sun <sunjunchao2870@gmail.com>
> > > > >
> > > > > Almost there :). A few comments below:
> > > > >
> > > > > > @@ -2402,6 +2404,9 @@ void __mark_inode_dirty(struct inode *inode, int
> > > > flags)
> > > > > >                       inode->i_state &= ~I_DIRTY_TIME;
> > > > > >               inode->i_state |= flags;
> > > > > >
> > > > > > +             wb = locked_inode_to_wb_and_lock_list(inode);
> > > > > > +             spin_lock(&inode->i_lock);
> > > > > > +
> > > > >
> > > >
> > > > > > We don't want to lock wb->list_lock if the inode was already dirty (which
> > > > > > is a common path). So you want something like:
> > > > > >
> > > > > >                 if (was_dirty)
> > > > > >                         wb = locked_inode_to_wb_and_lock_list(inode);
> > > >
> > > > I'm a little confused about here. The logic of the current source tree is
> > > > like this:
> > > >                        if (!was_dirty) {
> > > >                                struct bdi_writeback *wb;
> > > >                                wb =
> > > > locked_inode_to_wb_and_lock_list(inode);
> > > >                                ...
> > > >                                dirty_list = &wb-> b_dirty_time;
> > > >                                assert_spin_locked(&wb->list_lock);
> > > >                        }
> > > > The logic is the opposite of the logic in the comments, and it seems like
> > > > that wb will
> > > > absolutely not be NULL.
> > > > Why is this? What is the difference between them?
> > >
> > > Sorry, that was a typo in my suggestion. It should have been
> > >
> > >                  if (!was_dirty)
> > >                          wb = locked_inode_to_wb_and_lock_list(inode);
> 
> 1. I have noticed that move_expired_inodes() has the logic as follows:
> 
>                       list_move(&inode->i_io_list, &tmp);
>                       spin_lock(&inode->i_lock);
>                       inode->i_state |= I_SYNC_QUEUED;
>                       spin_unlock(&inode->i_lock);
>                       ...
>                       list_move(&inode->i_io_list, dispatch_queue);
> 
>    Neither of the two operations on i_io_list are protected with
> inode->i_lock. It looks like that
>    do this on purpose, I'm a little confused about this.
>    I wonder that is this a mistake. or did this on purpose and there
> is something I have missed?
>    If the later, why is that?

Yes, that looks like a bug but a harmless one. Actually looking into the
code I'm not sure we still need the protection of inode->i_lock for
inode->i_io_list handling but that would be a separate cleanup anyway.

> 2. I also have some doubts about the results of testing for xfs with
> xfstests.I'll describe my test
>     steps later. The kernel version used for testing is
> 5.18.0-rc5-00016-g107c948d1d3e-dirty,
>     and the latest commit is 107c948d1d3e ("Merge tag 'seccomp-v5.18-rc6' of
>      git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux")
> 
>    <a> First tested without this patch, there are always a few fixed cases
>           where it will fail, maybe I got something wrong.  Here is
> the testing result.
>           Failures: xfs/078 xfs/191-input-validation xfs/252 xfs/289
> xfs/293 xfs/514
>                                    xfs/515 xfs/544
>           Failed 8 of 304 tests

This is OK. Usually failures like these are due to older version of
xfsprogs, unexpected configuration of the kernel, some missing tool or
something like that. Anyway you should be mostly interested in
not introducing new test failures :).

>    <b> Then tested with the patch which applied your suggestions. The
> result is unstable.
>            There is a high probability that there will be more
> failures(which will report as follows),
>            and a small probability that the test result is the same as
> the above test which without
>            this patch.
> 
>            xfs/206 ... umount: /mnt/test: target is busy.
>            _check_xfs_filesystem: filesystem on /dev/loop0 has dirty log
>            (see /root/xfstests-dev/results/xfs/206.full for details)
>             _check_xfs_filesystem: filesystem on /dev/loop0 is inconsistent(r)
>             (see /root/xfstests-dev/results/xfs/206.full for details)
>             ...
>            Failures: xfs/078 xfs/149 xfs/164 xfs/165
> xfs/191-input-validation xfs/206 xfs/222
>                           xfs/242 xfs/250 xfs/252 xfs/259 xfs/260
> xfs/289 xfs/290 xfs/292 xfs/293
>                           xfs/514 xfs/515 xfs/544
>            Failed 19 of 304 tests.

So this is definitely suspicious. Likely the patch introduced a problem.
You need to have a look why e.g. test xfs/149 fails (you can run individual
test like "./check xfs/149"). You can inspect output the test generates,
also kernel logs if there's anything suspicious etc.

>            I saw that there is a "fatal error: couldn't initialize XFS
> library" which means xfs_repair
>            have failed.

Yeah, you can maybe strace xfs_repair why this fails...

>    <c>  Lastly tested with the patch which applied your suggestions
> and some modifications
>            which made operations on i_io_list in move_expired_inodes()
> will be protected by
>            inode->i_lock. There is a high probability that  the result
> is the same as the test in <a>,
>            and a small probability the same as <b>
> 
>    I think I must be missing something I don't understand yet, do I?

Well, it may be that there is just some race somewhere so the problem does
not always manifest.

> 3.   Here are my test steps.
>             xfs_io -f -c "falloc 0 10g" test.img
>             mkfs.xfs test.img
>             losetup /dev/loop0 ./test.img
>             mount /dev/loop0 /mnt/test
>             export TEST_DEV=/dev/loop0
>             export TEST_DIR=/mnt/test
>             ./check -g xfs/quick
>             reboot after the tests(If don't reboot, following test
> results will become more unstable
>             and have more failures)
> 
>             Repeat the above steps.

This looks OK.

								Honza
diff mbox series

Patch

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 591fe9cf1659..c879bcc41d28 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -120,6 +120,7 @@  static bool inode_io_list_move_locked(struct inode *inode,
 				      struct list_head *head)
 {
 	assert_spin_locked(&wb->list_lock);
+	assert_spin_locked(&inode->i_lock);
 
 	list_move(&inode->i_io_list, head);
 
@@ -2351,6 +2352,7 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 {
 	struct super_block *sb = inode->i_sb;
 	int dirtytime = 0;
+	struct bdi_writeback *wb;
 
 	trace_writeback_mark_inode_dirty(inode, flags);
 
@@ -2402,6 +2404,9 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 			inode->i_state &= ~I_DIRTY_TIME;
 		inode->i_state |= flags;
 
+		wb = locked_inode_to_wb_and_lock_list(inode);
+		spin_lock(&inode->i_lock);
+
 		/*
 		 * If the inode is queued for writeback by flush worker, just
 		 * update its dirty state. Once the flush worker is done with
@@ -2409,7 +2414,7 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 		 * list, based upon its state.
 		 */
 		if (inode->i_state & I_SYNC_QUEUED)
-			goto out_unlock_inode;
+			goto out_unlock;
 
 		/*
 		 * Only add valid (hashed) inodes to the superblock's
@@ -2417,22 +2422,19 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 		 */
 		if (!S_ISBLK(inode->i_mode)) {
 			if (inode_unhashed(inode))
-				goto out_unlock_inode;
+				goto out_unlock;
 		}
 		if (inode->i_state & I_FREEING)
-			goto out_unlock_inode;
+			goto out_unlock;
 
 		/*
 		 * If the inode was already on b_dirty/b_io/b_more_io, don't
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
-			struct bdi_writeback *wb;
 			struct list_head *dirty_list;
 			bool wakeup_bdi = false;
 
-			wb = locked_inode_to_wb_and_lock_list(inode);
-
 			inode->dirtied_when = jiffies;
 			if (dirtytime)
 				inode->dirtied_time_when = jiffies;
@@ -2446,6 +2448,7 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 							       dirty_list);
 
 			spin_unlock(&wb->list_lock);
+			spin_unlock(&inode->i_lock);
 			trace_writeback_dirty_inode_enqueue(inode);
 
 			/*
@@ -2460,6 +2463,8 @@  void __mark_inode_dirty(struct inode *inode, int flags)
 			return;
 		}
 	}
+out_unlock:
+	spin_unlock(&wb->list_lock);
 out_unlock_inode:
 	spin_unlock(&inode->i_lock);
 }
diff --git a/fs/inode.c b/fs/inode.c
index 9d9b422504d1..bd4da9c5207e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -27,7 +27,7 @@ 
  * Inode locking rules:
  *
  * inode->i_lock protects:
- *   inode->i_state, inode->i_hash, __iget()
+ *   inode->i_state, inode->i_hash, __iget(), inode->i_io_list
  * Inode LRU list locks protect:
  *   inode->i_sb->s_inode_lru, inode->i_lru
  * inode->i_sb->s_inode_list_lock protects: