Message ID | 20210222233107.3233795-1-david@fromorbit.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | xfs: use current->journal_info for detecting transaction recursion | expand |
On Tue, Feb 23, 2021 at 10:31:07AM +1100, Dave Chinner wrote: > From: Dave Chinner <dchinner@redhat.com> > > Because the iomap code using PF_MEMALLOC_NOFS to detect transaction > recursion in XFS is just wrong. Remove it from the iomap code and > replace it with XFS specific internal checks using > current->journal_info instead. It might be worth mentioning that this changes the PF_MEMALLOC_NOFS behavior very slightly -- it's now bound to the allocation and freeing of the transaction, instead of the strange way we used to do this, where we'd set it at reservation time but we don't /clear/ it at unreserve time. This doesn't strictly look like a fix patch, but as it is a Dumb Developer Detector(tm) I could try to push it for 5.12 ... or just make it one of the first 5.13 patches. Any preference? --D > Fixes: 9070733b4efa ("xfs: abstract PF_FSTRANS to PF_MEMALLOC_NOFS") > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- > fs/iomap/buffered-io.c | 7 ------- > fs/xfs/libxfs/xfs_btree.c | 4 +++- > fs/xfs/xfs_aops.c | 17 +++++++++++++++-- > fs/xfs/xfs_trans.c | 20 +++++--------------- > fs/xfs/xfs_trans.h | 30 ++++++++++++++++++++++++++++++ > 5 files changed, 53 insertions(+), 25 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 16a1e82e3aeb..fcd4a0d71fc1 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) > PF_MEMALLOC)) > goto redirty; > > - /* > - * Given that we do not allow direct reclaim to call us, we should > - * never be called in a recursive filesystem reclaim context. > - */ > - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) > - goto redirty; > - > /* > * Is this page beyond the end of the file? > * > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c > index b56ff451adce..e6a15b920034 100644 > --- a/fs/xfs/libxfs/xfs_btree.c > +++ b/fs/xfs/libxfs/xfs_btree.c > @@ -2805,7 +2805,7 @@ xfs_btree_split_worker( > struct xfs_btree_split_args *args = container_of(work, > struct xfs_btree_split_args, work); > unsigned long pflags; > - unsigned long new_pflags = PF_MEMALLOC_NOFS; > + unsigned long new_pflags = 0; > > /* > * we are in a transaction context here, but may also be doing work > @@ -2817,11 +2817,13 @@ xfs_btree_split_worker( > new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; > > current_set_flags_nested(&pflags, new_pflags); > + xfs_trans_set_context(args->cur->bc_tp); > > args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, > args->key, args->curp, args->stat); > complete(args->done); > > + xfs_trans_clear_context(args->cur->bc_tp); > current_restore_flags_nested(&pflags, new_pflags); > } > > diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c > index 4304c6416fbb..b4186d666157 100644 > --- a/fs/xfs/xfs_aops.c > +++ b/fs/xfs/xfs_aops.c > @@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc( > * We hand off the transaction to the completion thread now, so > * clear the flag here. > */ > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > + xfs_trans_clear_context(tp); > return 0; > } > > @@ -125,7 +125,7 @@ xfs_setfilesize_ioend( > * thus we need to mark ourselves as being in a transaction manually. > * Similarly for freeze protection. > */ > - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > + xfs_trans_set_context(tp); > __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); > > /* we abort the update if there was an IO error */ > @@ -568,6 +568,12 @@ xfs_vm_writepage( > { > struct xfs_writepage_ctx wpc = { }; > > + if (WARN_ON_ONCE(current->journal_info)) { > + redirty_page_for_writepage(wbc, page); > + unlock_page(page); > + return 0; > + } > + > return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); > } > > @@ -578,6 +584,13 @@ xfs_vm_writepages( > { > struct xfs_writepage_ctx wpc = { }; > > + /* > + * Writing back data in a transaction context can result in recursive > + * transactions. This is bad, so issue a warning and get out of here. > + */ > + if (WARN_ON_ONCE(current->journal_info)) > + return 0; > + > xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); > return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); > } > diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c > index 44f72c09c203..e2a922f061c7 100644 > --- a/fs/xfs/xfs_trans.c > +++ b/fs/xfs/xfs_trans.c > @@ -72,6 +72,7 @@ xfs_trans_free( > xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); > > trace_xfs_trans_free(tp, _RET_IP_); > + xfs_trans_clear_context(tp); > if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) > sb_end_intwrite(tp->t_mountp->m_super); > xfs_trans_free_dqinfo(tp); > @@ -123,7 +124,8 @@ xfs_trans_dup( > > ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; > tp->t_rtx_res = tp->t_rtx_res_used; > - ntp->t_pflags = tp->t_pflags; > + > + xfs_trans_switch_context(tp, ntp); > > /* move deferred ops over to the new tp */ > xfs_defer_move(ntp, tp); > @@ -157,9 +159,6 @@ xfs_trans_reserve( > int error = 0; > bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; > > - /* Mark this thread as being in a transaction */ > - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > - > /* > * Attempt to reserve the needed disk blocks by decrementing > * the number needed from the number available. This will > @@ -167,10 +166,8 @@ xfs_trans_reserve( > */ > if (blocks > 0) { > error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); > - if (error != 0) { > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > + if (error != 0) > return -ENOSPC; > - } > tp->t_blk_res += blocks; > } > > @@ -244,9 +241,6 @@ xfs_trans_reserve( > xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); > tp->t_blk_res = 0; > } > - > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > - > return error; > } > > @@ -270,6 +264,7 @@ xfs_trans_alloc( > tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); > if (!(flags & XFS_TRANS_NO_WRITECOUNT)) > sb_start_intwrite(mp->m_super); > + xfs_trans_set_context(tp); > > /* > * Zero-reservation ("empty") transactions can't modify anything, so > @@ -893,7 +888,6 @@ __xfs_trans_commit( > > xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); > > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > xfs_trans_free(tp); > > /* > @@ -925,7 +919,6 @@ __xfs_trans_commit( > xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); > tp->t_ticket = NULL; > } > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > xfs_trans_free_items(tp, !!error); > xfs_trans_free(tp); > > @@ -985,9 +978,6 @@ xfs_trans_cancel( > tp->t_ticket = NULL; > } > > - /* mark this thread as no longer being in a transaction */ > - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); > - > xfs_trans_free_items(tp, dirty); > xfs_trans_free(tp); > } > diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h > index 8b03fbfe9a1b..9dd745cf77c9 100644 > --- a/fs/xfs/xfs_trans.h > +++ b/fs/xfs/xfs_trans.h > @@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, > struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, > struct xfs_trans **tpp); > > +static inline void > +xfs_trans_set_context( > + struct xfs_trans *tp) > +{ > + ASSERT(current->journal_info == NULL); > + tp->t_pflags = memalloc_nofs_save(); > + current->journal_info = tp; > +} > + > +static inline void > +xfs_trans_clear_context( > + struct xfs_trans *tp) > +{ > + if (current->journal_info == tp) { > + memalloc_nofs_restore(tp->t_pflags); > + current->journal_info = NULL; > + } > +} > + > +static inline void > +xfs_trans_switch_context( > + struct xfs_trans *old_tp, > + struct xfs_trans *new_tp) > +{ > + ASSERT(current->journal_info == old_tp); > + new_tp->t_pflags = old_tp->t_pflags; > + old_tp->t_pflags = 0; > + current->journal_info = new_tp; > +} > + > #endif /* __XFS_TRANS_H__ */ > -- > 2.28.0 >
On Mon, Feb 22, 2021 at 06:15:57PM -0800, Darrick J. Wong wrote: > On Tue, Feb 23, 2021 at 10:31:07AM +1100, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > > > Because the iomap code using PF_MEMALLOC_NOFS to detect transaction > > recursion in XFS is just wrong. Remove it from the iomap code and > > replace it with XFS specific internal checks using > > current->journal_info instead. > > It might be worth mentioning that this changes the PF_MEMALLOC_NOFS > behavior very slightly -- it's now bound to the allocation and freeing > of the transaction, instead of the strange way we used to do this, where > we'd set it at reservation time but we don't /clear/ it at unreserve time. They are effectively the same thing, so I think you are splitting hairs here. The rule is "transaction context is NOFS" so whether it is set when the transaction context is entered or a few instructions later when we start the reservation is not significant. > This doesn't strictly look like a fix patch, but as it is a Dumb > Developer Detector(tm) I could try to push it for 5.12 ... or just make > it one of the first 5.13 patches. Any preference? Nope. You're going to need to fix the transaction nesting the new gc code does before applying this, though, because that is detected as transaction recursion by this patch.... Cheers, Dave.
On Tue, Feb 23, 2021 at 02:28:37PM +1100, Dave Chinner wrote: > On Mon, Feb 22, 2021 at 06:15:57PM -0800, Darrick J. Wong wrote: > > On Tue, Feb 23, 2021 at 10:31:07AM +1100, Dave Chinner wrote: > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > Because the iomap code using PF_MEMALLOC_NOFS to detect transaction > > > recursion in XFS is just wrong. Remove it from the iomap code and > > > replace it with XFS specific internal checks using > > > current->journal_info instead. > > > > It might be worth mentioning that this changes the PF_MEMALLOC_NOFS > > behavior very slightly -- it's now bound to the allocation and freeing > > of the transaction, instead of the strange way we used to do this, where > > we'd set it at reservation time but we don't /clear/ it at unreserve time. > > They are effectively the same thing, so I think you are splitting > hairs here. The rule is "transaction context is NOFS" so whether it > is set when the transaction context is entered or a few instructions > later when we start the reservation is not significant. > > > This doesn't strictly look like a fix patch, but as it is a Dumb > > Developer Detector(tm) I could try to push it for 5.12 ... or just make > > it one of the first 5.13 patches. Any preference? > > Nope. You're going to need to fix the transaction nesting the new gc > code does before applying this, though, because that is detected as > transaction recursion by this patch.... Well yes, I was trying to see if I could throw in the fix patch and the idiot detector, both at the same time... :) That said, it crashes in xfs/229: 2822 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, 2823 args->key, args->curp, args->stat); 2824 complete(args->done); 2825 > 2826 xfs_trans_clear_context(args->cur->bc_tp); 2827 current_restore_flags_nested(&pflags, new_pflags); It's possible for the original wait_for_completion() in xfs_btree_split() to wake up immediately after complete() drops the lock. If it returns (and blows away the stack variable @args) before the worker resumes, then the worker will be dereferencing freed stack memory and blows up: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP CPU: 0 PID: 375393 Comm: kworker/0:16 Tainted: G W 5.11.0-rc4-djw #rc4 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: xfsalloc xfs_btree_split_worker [xfs] RIP: 0010:xfs_btree_split_worker+0xaa/0x120 [xfs] Code: 4b d8 48 8b 53 d0 8b 73 c8 48 8b 7b c0 4c 8b 4b e8 4c 8b 43 e0 e8 46 f5 ff ff 48 8b 7b f8 89 43 f0 e8 0a 84 aa e0 48 8b 43 c0 <48> 8b 00 49 3b 84 24 90 08 00 00 74 20 65 48 8b 14 25 80 6d 01 00 RSP: 0018:ffffc90003123e60 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffffc9000311b8a8 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000286 RDI: 00000000ffffffff RBP: 0000000000000000 R08: ffffc9000311b858 R09: ffff88803ec29f80 R10: ffff8880054a80d0 R11: ffff88803ec2a030 R12: ffff888005235d00 R13: 0000000004248060 R14: ffff88800cbe5f00 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000000200c002 CR4: 00000000001706b0 Call Trace: process_one_work+0x1dd/0x3b0 worker_thread+0x57/0x3c0 ? rescuer_thread+0x3b0/0x3b0 kthread+0x14f/0x170 ? __kthread_bind_mask+0x60/0x60 ret_from_fork+0x1f/0x30 Modules linked in: xfs dm_thin_pool dm_persistent_data dm_bio_prison dm_snapshot dm_bufio dm_flakey libcrc32c ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 ip_set_hash_ip xt_REDIRECT xt_set xt_tcpudp ip_set_hash_net ip_set_hash_mac ip_set nfnetlink iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip6table_filter ip6_tables iptable_filter bfq sch_fq_codel ip_tables x_tables overlay nfsv4 af_packet [last unloaded: xfs] Dumping ftrace buffer: (ftrace buffer empty) CR2: 0000000000000000 ---[ end trace 1a6aae037bf68618 ]--- --D > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com
On Mon, Feb 22, 2021 at 08:51:05PM -0800, Darrick J. Wong wrote: > On Tue, Feb 23, 2021 at 02:28:37PM +1100, Dave Chinner wrote: > > On Mon, Feb 22, 2021 at 06:15:57PM -0800, Darrick J. Wong wrote: > > > On Tue, Feb 23, 2021 at 10:31:07AM +1100, Dave Chinner wrote: > > > > From: Dave Chinner <dchinner@redhat.com> > > > > > > > > Because the iomap code using PF_MEMALLOC_NOFS to detect transaction > > > > recursion in XFS is just wrong. Remove it from the iomap code and > > > > replace it with XFS specific internal checks using > > > > current->journal_info instead. > > > > > > It might be worth mentioning that this changes the PF_MEMALLOC_NOFS > > > behavior very slightly -- it's now bound to the allocation and freeing > > > of the transaction, instead of the strange way we used to do this, where > > > we'd set it at reservation time but we don't /clear/ it at unreserve time. > > > > They are effectively the same thing, so I think you are splitting > > hairs here. The rule is "transaction context is NOFS" so whether it > > is set when the transaction context is entered or a few instructions > > later when we start the reservation is not significant. > > > > > This doesn't strictly look like a fix patch, but as it is a Dumb > > > Developer Detector(tm) I could try to push it for 5.12 ... or just make > > > it one of the first 5.13 patches. Any preference? > > > > Nope. You're going to need to fix the transaction nesting the new gc > > code does before applying this, though, because that is detected as > > transaction recursion by this patch.... > > Well yes, I was trying to see if I could throw in the fix patch and the > idiot detector, both at the same time... :) > > That said, it crashes in xfs/229: > > 2822 args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, > 2823 args->key, args->curp, args->stat); > 2824 complete(args->done); > 2825 > > 2826 xfs_trans_clear_context(args->cur->bc_tp); > 2827 current_restore_flags_nested(&pflags, new_pflags); > > It's possible for the original wait_for_completion() in > xfs_btree_split() to wake up immediately after complete() drops the > lock. If it returns (and blows away the stack variable @args) before > the worker resumes, then the worker will be dereferencing freed stack > memory and blows up: Argh. So I left an undocumented landmine in that code that I then stepped on myself years later. Easy to fix, just clear the context before calling done. I'll go re-attach my leg, then update it and drop a comment in there, too. Cheers, Dave.
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 16a1e82e3aeb..fcd4a0d71fc1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1458,13 +1458,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) PF_MEMALLOC)) goto redirty; - /* - * Given that we do not allow direct reclaim to call us, we should - * never be called in a recursive filesystem reclaim context. - */ - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) - goto redirty; - /* * Is this page beyond the end of the file? * diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index b56ff451adce..e6a15b920034 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2805,7 +2805,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_MEMALLOC_NOFS; + unsigned long new_pflags = 0; /* * we are in a transaction context here, but may also be doing work @@ -2817,11 +2817,13 @@ xfs_btree_split_worker( new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; current_set_flags_nested(&pflags, new_pflags); + xfs_trans_set_context(args->cur->bc_tp); args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->key, args->curp, args->stat); complete(args->done); + xfs_trans_clear_context(args->cur->bc_tp); current_restore_flags_nested(&pflags, new_pflags); } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 4304c6416fbb..b4186d666157 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc( * We hand off the transaction to the completion thread now, so * clear the flag here. */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_clear_context(tp); return 0; } @@ -125,7 +125,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_set_context(tp); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); /* we abort the update if there was an IO error */ @@ -568,6 +568,12 @@ xfs_vm_writepage( { struct xfs_writepage_ctx wpc = { }; + if (WARN_ON_ONCE(current->journal_info)) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); } @@ -578,6 +584,13 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; + /* + * Writing back data in a transaction context can result in recursive + * transactions. This is bad, so issue a warning and get out of here. + */ + if (WARN_ON_ONCE(current->journal_info)) + return 0; + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 44f72c09c203..e2a922f061c7 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -72,6 +72,7 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); trace_xfs_trans_free(tp, _RET_IP_); + xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); @@ -123,7 +124,8 @@ xfs_trans_dup( ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - ntp->t_pflags = tp->t_pflags; + + xfs_trans_switch_context(tp, ntp); /* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -157,9 +159,6 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; - /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - /* * Attempt to reserve the needed disk blocks by decrementing * the number needed from the number available. This will @@ -167,10 +166,8 @@ xfs_trans_reserve( */ if (blocks > 0) { error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); - if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + if (error != 0) return -ENOSPC; - } tp->t_blk_res += blocks; } @@ -244,9 +241,6 @@ xfs_trans_reserve( xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } - - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - return error; } @@ -270,6 +264,7 @@ xfs_trans_alloc( tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp); /* * Zero-reservation ("empty") transactions can't modify anything, so @@ -893,7 +888,6 @@ __xfs_trans_commit( xfs_log_commit_cil(mp, tp, &commit_lsn, regrant); - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp); /* @@ -925,7 +919,6 @@ __xfs_trans_commit( xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, !!error); xfs_trans_free(tp); @@ -985,9 +978,6 @@ xfs_trans_cancel( tp->t_ticket = NULL; } - /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 8b03fbfe9a1b..9dd745cf77c9 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_trans **tpp); +static inline void +xfs_trans_set_context( + struct xfs_trans *tp) +{ + ASSERT(current->journal_info == NULL); + tp->t_pflags = memalloc_nofs_save(); + current->journal_info = tp; +} + +static inline void +xfs_trans_clear_context( + struct xfs_trans *tp) +{ + if (current->journal_info == tp) { + memalloc_nofs_restore(tp->t_pflags); + current->journal_info = NULL; + } +} + +static inline void +xfs_trans_switch_context( + struct xfs_trans *old_tp, + struct xfs_trans *new_tp) +{ + ASSERT(current->journal_info == old_tp); + new_tp->t_pflags = old_tp->t_pflags; + old_tp->t_pflags = 0; + current->journal_info = new_tp; +} + #endif /* __XFS_TRANS_H__ */