diff mbox series

[1/2] xfs: wait xlog ioend workqueue drained before tearing down AIL

Message ID 20221103083632.150458-2-guoxuenan@huawei.com (mailing list archive)
State Superseded
Headers show
Series [1/2] xfs: wait xlog ioend workqueue drained before tearing down AIL | expand

Commit Message

Guo Xuenan Nov. 3, 2022, 8:36 a.m. UTC
Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
xlog_state_shutdown_callbacks") changed the order of running callbacks
and wait for iclog completion to avoid unmount path untimely destroy AIL.
But which seems not enough to ensue this, adding mdelay in
`xfs_buf_item_unpin` can prove that.

The reproduction is as follows. To ensure destroy AIL safely,
we should wait all xlog ioend workers done and sync the AIL.

==================================================================
BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43

CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
6.1.0-rc1-00002-gc28266863c4a #137
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.13.0-1ubuntu1.1 04/01/2014
Workqueue: xfs-log/sda xlog_ioend_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x4d/0x66
 print_report+0x171/0x4a6
 kasan_report+0xb3/0x130
 xfs_trans_ail_delete+0x240/0x2a0
 xfs_buf_item_done+0x7b/0xa0
 xfs_buf_ioend+0x1e9/0x11f0
 xfs_buf_item_unpin+0x4c8/0x860
 xfs_trans_committed_bulk+0x4c2/0x7c0
 xlog_cil_committed+0xab6/0xfb0
 xlog_cil_process_committed+0x117/0x1e0
 xlog_state_shutdown_callbacks+0x208/0x440
 xlog_force_shutdown+0x1b3/0x3a0
 xlog_ioend_work+0xef/0x1d0
 process_one_work+0x6f9/0xf70
 worker_thread+0x578/0xf30
 kthread+0x28c/0x330
 ret_from_fork+0x1f/0x30
 </TASK>

Allocated by task 9606:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 __kasan_kmalloc+0x7a/0x90
 __kmalloc+0x59/0x140
 kmem_alloc+0xb2/0x2f0
 xfs_trans_ail_init+0x20/0x320
 xfs_log_mount+0x37e/0x690
 xfs_mountfs+0xe36/0x1b40
 xfs_fs_fill_super+0xc5c/0x1a70
 get_tree_bdev+0x3c5/0x6c0
 vfs_get_tree+0x85/0x250
 path_mount+0xec3/0x1830
 do_mount+0xef/0x110
 __x64_sys_mount+0x150/0x1f0
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Freed by task 9662:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 kasan_save_free_info+0x2a/0x40
 __kasan_slab_free+0x105/0x1a0
 __kmem_cache_free+0x99/0x2d0
 kvfree+0x3a/0x40
 xfs_log_unmount+0x60/0xf0
 xfs_unmountfs+0xf3/0x1d0
 xfs_fs_put_super+0x78/0x300
 generic_shutdown_super+0x151/0x400
 kill_block_super+0x9a/0xe0
 deactivate_locked_super+0x82/0xe0
 deactivate_super+0x91/0xb0
 cleanup_mnt+0x32a/0x4a0
 task_work_run+0x15f/0x240
 exit_to_user_mode_prepare+0x188/0x190
 syscall_exit_to_user_mode+0x12/0x30
 do_syscall_64+0x42/0x80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

The buggy address belongs to the object at ffff888023169400
 which belongs to the cache kmalloc-128 of size 128
The buggy address is located 0 bytes inside of
 128-byte region [ffff888023169400, ffff888023169480)

The buggy address belongs to the physical page:
page:ffffea00008c5a00 refcount:1 mapcount:0 mapping:0000000000000000
index:0xffff888023168f80 pfn:0x23168
head:ffffea00008c5a00 order:1 compound_mapcount:0 compound_pincount:0
flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff)
raw: 001fffff80010200 ffffea00006b3988 ffffea0000577a88 ffff88800f842ac0
raw: ffff888023168f80 0000000000150007 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff888023169300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888023169380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff888023169400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                   ^
 ffff888023169480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888023169500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================
Disabling lock debugging due to kernel taint

Fixes: cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks")
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
---
 fs/xfs/xfs_trans_ail.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Dave Chinner Nov. 3, 2022, 9:16 p.m. UTC | #1
On Thu, Nov 03, 2022 at 04:36:31PM +0800, Guo Xuenan wrote:
> Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
> In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
> xlog_state_shutdown_callbacks") changed the order of running callbacks
> and wait for iclog completion to avoid unmount path untimely destroy AIL.
> But which seems not enough to ensue this, adding mdelay in
> `xfs_buf_item_unpin` can prove that.
> 
> The reproduction is as follows. To ensure destroy AIL safely,
> we should wait all xlog ioend workers done and sync the AIL.
> 
> ==================================================================
> BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
> Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43
> 
> CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
> 6.1.0-rc1-00002-gc28266863c4a #137
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> 1.13.0-1ubuntu1.1 04/01/2014
> Workqueue: xfs-log/sda xlog_ioend_work
> Call Trace:
>  <TASK>
>  dump_stack_lvl+0x4d/0x66
>  print_report+0x171/0x4a6
>  kasan_report+0xb3/0x130
>  xfs_trans_ail_delete+0x240/0x2a0
>  xfs_buf_item_done+0x7b/0xa0
>  xfs_buf_ioend+0x1e9/0x11f0
>  xfs_buf_item_unpin+0x4c8/0x860
>  xfs_trans_committed_bulk+0x4c2/0x7c0
>  xlog_cil_committed+0xab6/0xfb0
>  xlog_cil_process_committed+0x117/0x1e0
>  xlog_state_shutdown_callbacks+0x208/0x440
>  xlog_force_shutdown+0x1b3/0x3a0
>  xlog_ioend_work+0xef/0x1d0

So we are still processing an iclog at this point and have it
locked (iclog->ic_sema is held). These aren't cycled to wait for
all processing to complete until xlog_dealloc_log() before they are
freed.

If we cycle through the iclog->ic_sema locks when we quiesce the log
(as we should be doing before attempting to write an unmount record)
this UAF problem goes away, right?

> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> index f51df7d94ef7..1054adb29907 100644
> --- a/fs/xfs/xfs_trans_ail.c
> +++ b/fs/xfs/xfs_trans_ail.c
> @@ -929,6 +929,9 @@ xfs_trans_ail_destroy(
>  {
>  	struct xfs_ail	*ailp = mp->m_ail;
>  
> +	drain_workqueue(mp->m_log->l_ioend_workqueue);
> +	xfs_ail_push_all_sync(ailp);

This isn't the place to be draining the AIL and waiting for IO to
complete. As per above, that should have been done long before we
get here...

-Dave.
Guo Xuenan Nov. 4, 2022, 7:50 a.m. UTC | #2
Hi,Dave:
On 2022/11/4 5:16, Dave Chinner wrote:
> On Thu, Nov 03, 2022 at 04:36:31PM +0800, Guo Xuenan wrote:
>> Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
>> In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
>> xlog_state_shutdown_callbacks") changed the order of running callbacks
>> and wait for iclog completion to avoid unmount path untimely destroy AIL.
>> But which seems not enough to ensue this, adding mdelay in
>> `xfs_buf_item_unpin` can prove that.
>>
>> The reproduction is as follows. To ensure destroy AIL safely,
>> we should wait all xlog ioend workers done and sync the AIL.
>>
>> ==================================================================
>> BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
>> Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43
>>
>> CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
>> 6.1.0-rc1-00002-gc28266863c4a #137
>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>> 1.13.0-1ubuntu1.1 04/01/2014
>> Workqueue: xfs-log/sda xlog_ioend_work
>> Call Trace:
>>   <TASK>
>>   dump_stack_lvl+0x4d/0x66
>>   print_report+0x171/0x4a6
>>   kasan_report+0xb3/0x130
>>   xfs_trans_ail_delete+0x240/0x2a0
>>   xfs_buf_item_done+0x7b/0xa0
>>   xfs_buf_ioend+0x1e9/0x11f0
>>   xfs_buf_item_unpin+0x4c8/0x860
>>   xfs_trans_committed_bulk+0x4c2/0x7c0
>>   xlog_cil_committed+0xab6/0xfb0
>>   xlog_cil_process_committed+0x117/0x1e0
>>   xlog_state_shutdown_callbacks+0x208/0x440
>>   xlog_force_shutdown+0x1b3/0x3a0
>>   xlog_ioend_work+0xef/0x1d0
> So we are still processing an iclog at this point and have it
> locked (iclog->ic_sema is held). These aren't cycled to wait for
> all processing to complete until xlog_dealloc_log() before they are
> freed.
>
> If we cycle through the iclog->ic_sema locks when we quiesce the log
> (as we should be doing before attempting to write an unmount record)
> this UAF problem goes away, right?
Yes,:) right!According to the method you said, we can also solve this 
problem.
The key to sloving this problem is to make sure that all log IO is done 
before
tearing down AIL.
>
>> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
>> index f51df7d94ef7..1054adb29907 100644
>> --- a/fs/xfs/xfs_trans_ail.c
>> +++ b/fs/xfs/xfs_trans_ail.c
>> @@ -929,6 +929,9 @@ xfs_trans_ail_destroy(
>>   {
>>   	struct xfs_ail	*ailp = mp->m_ail;
>>   
>> +	drain_workqueue(mp->m_log->l_ioend_workqueue);
>> +	xfs_ail_push_all_sync(ailp);
> This isn't the place to be draining the AIL and waiting for IO to
> complete. As per above, that should have been done long before we
> get here...
I'm agree with your opinion,but, I have verified that it can indeed 
solve the UAF.
And, I also verify the way you suggested, it is equally effective.
But, I have no better idea where to place this check, hope for your 
better suggestions.
Here I provide a way for reference,would you kindly consider the 
following modifications,
thanks in advance :)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f02a0dd522b3..4e48cc3ba6da 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1094,8 +1094,22 @@ void
  xfs_log_clean(
         struct xfs_mount        *mp)
  {
+       struct xlog     *log = mp->m_log;
+       xlog_in_core_t  *iclog = log->l_iclog;
+       int             i;
+
         xfs_log_quiesce(mp);
         xfs_log_unmount_write(mp);
+
+       /*
+        * Cycle all the iclogbuf locks to make sure all log IO completion
+        * is done before we tear down AIL.
+        */
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               down(&iclog->ic_sema);
+               up(&iclog->ic_sema);
+               iclog = iclog->ic_next;
+       }
  }

Best regards
Xuenan
> -Dave.
Darrick J. Wong Nov. 4, 2022, 3:46 p.m. UTC | #3
On Fri, Nov 04, 2022 at 03:50:44PM +0800, Guo Xuenan wrote:
> Hi,Dave:
> On 2022/11/4 5:16, Dave Chinner wrote:
> > On Thu, Nov 03, 2022 at 04:36:31PM +0800, Guo Xuenan wrote:
> > > Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
> > > In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
> > > xlog_state_shutdown_callbacks") changed the order of running callbacks
> > > and wait for iclog completion to avoid unmount path untimely destroy AIL.
> > > But which seems not enough to ensue this, adding mdelay in
> > > `xfs_buf_item_unpin` can prove that.
> > > 
> > > The reproduction is as follows. To ensure destroy AIL safely,
> > > we should wait all xlog ioend workers done and sync the AIL.
> > > 
> > > ==================================================================
> > > BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
> > > Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43
> > > 
> > > CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
> > > 6.1.0-rc1-00002-gc28266863c4a #137
> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
> > > 1.13.0-1ubuntu1.1 04/01/2014
> > > Workqueue: xfs-log/sda xlog_ioend_work
> > > Call Trace:
> > >   <TASK>
> > >   dump_stack_lvl+0x4d/0x66
> > >   print_report+0x171/0x4a6
> > >   kasan_report+0xb3/0x130
> > >   xfs_trans_ail_delete+0x240/0x2a0
> > >   xfs_buf_item_done+0x7b/0xa0
> > >   xfs_buf_ioend+0x1e9/0x11f0
> > >   xfs_buf_item_unpin+0x4c8/0x860
> > >   xfs_trans_committed_bulk+0x4c2/0x7c0
> > >   xlog_cil_committed+0xab6/0xfb0
> > >   xlog_cil_process_committed+0x117/0x1e0
> > >   xlog_state_shutdown_callbacks+0x208/0x440
> > >   xlog_force_shutdown+0x1b3/0x3a0
> > >   xlog_ioend_work+0xef/0x1d0
> > So we are still processing an iclog at this point and have it
> > locked (iclog->ic_sema is held). These aren't cycled to wait for
> > all processing to complete until xlog_dealloc_log() before they are
> > freed.
> > 
> > If we cycle through the iclog->ic_sema locks when we quiesce the log
> > (as we should be doing before attempting to write an unmount record)
> > this UAF problem goes away, right?
> Yes,:) right!According to the method you said, we can also solve this
> problem.
> The key to sloving this problem is to make sure that all log IO is done
> before
> tearing down AIL.
> > 
> > > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> > > index f51df7d94ef7..1054adb29907 100644
> > > --- a/fs/xfs/xfs_trans_ail.c
> > > +++ b/fs/xfs/xfs_trans_ail.c
> > > @@ -929,6 +929,9 @@ xfs_trans_ail_destroy(
> > >   {
> > >   	struct xfs_ail	*ailp = mp->m_ail;
> > > +	drain_workqueue(mp->m_log->l_ioend_workqueue);
> > > +	xfs_ail_push_all_sync(ailp);
> > This isn't the place to be draining the AIL and waiting for IO to
> > complete. As per above, that should have been done long before we
> > get here...
> I'm agree with your opinion,but, I have verified that it can indeed solve
> the UAF.
> And, I also verify the way you suggested, it is equally effective.
> But, I have no better idea where to place this check, hope for your better
> suggestions.
> Here I provide a way for reference,would you kindly consider the following
> modifications,
> thanks in advance :)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index f02a0dd522b3..4e48cc3ba6da 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -1094,8 +1094,22 @@ void
>  xfs_log_clean(
>         struct xfs_mount        *mp)
>  {
> +       struct xlog     *log = mp->m_log;
> +       xlog_in_core_t  *iclog = log->l_iclog;
> +       int             i;
> +
>         xfs_log_quiesce(mp);
>         xfs_log_unmount_write(mp);
> +
> +       /*
> +        * Cycle all the iclogbuf locks to make sure all log IO completion
> +        * is done before we tear down AIL.
> +        */
> +       for (i = 0; i < log->l_iclog_bufs; i++) {
> +               down(&iclog->ic_sema);
> +               up(&iclog->ic_sema);
> +               iclog = iclog->ic_next;
> +       }

I'm pretty sure Dave meant *before* xfs_log_unmount_write when he said
"as we should be doing before attempting to write an unmount record".
Just from looking at function names, I wonder if this shouldn't be a
final step of xfs_log_quiesce since a log with active IO completion
doesn't really sound empty to me...

--D

>  }
> 
> Best regards
> Xuenan
> > -Dave.
> 
> -- 
> Guo Xuenan [OS Kernel Lab]
> -----------------------------
> Email: guoxuenan@huawei.com
>
Guo Xuenan Nov. 5, 2022, 3:32 a.m. UTC | #4
On 2022/11/4 23:46, Darrick J. Wong wrote:
> On Fri, Nov 04, 2022 at 03:50:44PM +0800, Guo Xuenan wrote:
>> Hi,Dave:
>> On 2022/11/4 5:16, Dave Chinner wrote:
>>> On Thu, Nov 03, 2022 at 04:36:31PM +0800, Guo Xuenan wrote:
>>>> Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
>>>> In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
>>>> xlog_state_shutdown_callbacks") changed the order of running callbacks
>>>> and wait for iclog completion to avoid unmount path untimely destroy AIL.
>>>> But which seems not enough to ensue this, adding mdelay in
>>>> `xfs_buf_item_unpin` can prove that.
>>>>
>>>> The reproduction is as follows. To ensure destroy AIL safely,
>>>> we should wait all xlog ioend workers done and sync the AIL.
>>>>
>>>> ==================================================================
>>>> BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
>>>> Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43
>>>>
>>>> CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
>>>> 6.1.0-rc1-00002-gc28266863c4a #137
>>>> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
>>>> 1.13.0-1ubuntu1.1 04/01/2014
>>>> Workqueue: xfs-log/sda xlog_ioend_work
>>>> Call Trace:
>>>>    <TASK>
>>>>    dump_stack_lvl+0x4d/0x66
>>>>    print_report+0x171/0x4a6
>>>>    kasan_report+0xb3/0x130
>>>>    xfs_trans_ail_delete+0x240/0x2a0
>>>>    xfs_buf_item_done+0x7b/0xa0
>>>>    xfs_buf_ioend+0x1e9/0x11f0
>>>>    xfs_buf_item_unpin+0x4c8/0x860
>>>>    xfs_trans_committed_bulk+0x4c2/0x7c0
>>>>    xlog_cil_committed+0xab6/0xfb0
>>>>    xlog_cil_process_committed+0x117/0x1e0
>>>>    xlog_state_shutdown_callbacks+0x208/0x440
>>>>    xlog_force_shutdown+0x1b3/0x3a0
>>>>    xlog_ioend_work+0xef/0x1d0
>>> So we are still processing an iclog at this point and have it
>>> locked (iclog->ic_sema is held). These aren't cycled to wait for
>>> all processing to complete until xlog_dealloc_log() before they are
>>> freed.
>>>
>>> If we cycle through the iclog->ic_sema locks when we quiesce the log
>>> (as we should be doing before attempting to write an unmount record)
>>> this UAF problem goes away, right?
>> Yes,:) right!According to the method you said, we can also solve this
>> problem.
>> The key to sloving this problem is to make sure that all log IO is done
>> before
>> tearing down AIL.
>>>> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
>>>> index f51df7d94ef7..1054adb29907 100644
>>>> --- a/fs/xfs/xfs_trans_ail.c
>>>> +++ b/fs/xfs/xfs_trans_ail.c
>>>> @@ -929,6 +929,9 @@ xfs_trans_ail_destroy(
>>>>    {
>>>>    	struct xfs_ail	*ailp = mp->m_ail;
>>>> +	drain_workqueue(mp->m_log->l_ioend_workqueue);
>>>> +	xfs_ail_push_all_sync(ailp);
>>> This isn't the place to be draining the AIL and waiting for IO to
>>> complete. As per above, that should have been done long before we
>>> get here...
>> I'm agree with your opinion,but, I have verified that it can indeed solve
>> the UAF.
>> And, I also verify the way you suggested, it is equally effective.
>> But, I have no better idea where to place this check, hope for your better
>> suggestions.
>> Here I provide a way for reference,would you kindly consider the following
>> modifications,
>> thanks in advance :)
>>
>> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
>> index f02a0dd522b3..4e48cc3ba6da 100644
>> --- a/fs/xfs/xfs_log.c
>> +++ b/fs/xfs/xfs_log.c
>> @@ -1094,8 +1094,22 @@ void
>>   xfs_log_clean(
>>          struct xfs_mount        *mp)
>>   {
>> +       struct xlog     *log = mp->m_log;
>> +       xlog_in_core_t  *iclog = log->l_iclog;
>> +       int             i;
>> +
>>          xfs_log_quiesce(mp);
>>          xfs_log_unmount_write(mp);
>> +
>> +       /*
>> +        * Cycle all the iclogbuf locks to make sure all log IO completion
>> +        * is done before we tear down AIL.
>> +        */
>> +       for (i = 0; i < log->l_iclog_bufs; i++) {
>> +               down(&iclog->ic_sema);
>> +               up(&iclog->ic_sema);
>> +               iclog = iclog->ic_next;
>> +       }
> I'm pretty sure Dave meant *before* xfs_log_unmount_write when he said
> "as we should be doing before attempting to write an unmount record".
> Just from looking at function names, I wonder if this shouldn't be a
> final step of xfs_log_quiesce since a log with active IO completion
> doesn't really sound empty to me...
Sorry for my poor english,IIUC,you meant put the io compeletion check
between xfs_log_quiesce and xfs_log_umount_write ? May we abstract
the "cycle iclogbuf wait" into a function named
xlog_wait_iodone/xlog_quiesce_done or something more appropriate.

For example:
@@ -82,6 +82,9 @@ STATIC int
  xlog_iclogs_empty(
         struct xlog             *log);
+static void
+xlog_wait_iodone(struct xlog *log);
+
  static int
  xfs_log_cover(struct xfs_mount *);

@@ -886,6 +889,23 @@ xlog_force_iclog(
         return xlog_state_release_iclog(iclog->ic_log, iclog, NULL);
  }

+/*
+ * Cycle all the iclogbuf locks to make sure all log IO completion
+ * is done before we tear down AIL/CIL.
+ */
+static void
+xlog_wait_iodone(struct xlog *log)
+{
+       int             i;
+       xlog_in_core_t  *iclog = log->l_iclog;
+
+       for (i = 0; i < log->l_iclog_bufs; i++) {
+               down(&iclog->ic_sema);
+               up(&iclog->ic_sema);
+               iclog = iclog->ic_next;
+       }
+}
+
  /*
   * Wait for the iclog and all prior iclogs to be written disk as 
required by the
   * log force state machine. Waiting on ic_force_wait ensures iclog 
completions
@@ -1095,6 +1115,7 @@ xfs_log_clean(
         struct xfs_mount        *mp)
  {
         xfs_log_quiesce(mp);
+       xlog_wait_iodone(mp->m_log);
         xfs_log_unmount_write(mp);
  }

@@ -2113,17 +2134,7 @@ xlog_dealloc_log(
         xlog_in_core_t  *iclog, *next_iclog;
         int             i;

-       /*
-        * Cycle all the iclogbuf locks to make sure all log IO completion
-        * is done before we tear down these buffers.
-        */
-       iclog = log->l_iclog;
-       for (i = 0; i < log->l_iclog_bufs; i++) {
-               down(&iclog->ic_sema);
-               up(&iclog->ic_sema);
-               iclog = iclog->ic_next;
-       }
-
+       xlog_wait_iodone(log);

Best regards :)
Xuenan
> --D
>
>>   }
>>
>> Best regards
>> Xuenan
>>> -Dave.
>> -- 
>> Guo Xuenan [OS Kernel Lab]
>> -----------------------------
>> Email: guoxuenan@huawei.com
>>
> .
diff mbox series

Patch

diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index f51df7d94ef7..1054adb29907 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -929,6 +929,9 @@  xfs_trans_ail_destroy(
 {
 	struct xfs_ail	*ailp = mp->m_ail;
 
+	drain_workqueue(mp->m_log->l_ioend_workqueue);
+	xfs_ail_push_all_sync(ailp);
+
 	kthread_stop(ailp->ail_task);
 	kmem_free(ailp);
 }