diff mbox

[v2] Btrfs: fix file/data loss caused by fsync after rename and new inode

Message ID 1459377441-3609-1-git-send-email-fdmanana@kernel.org (mailing list archive)
State Accepted
Headers show

Commit Message

Filipe Manana March 30, 2016, 10:37 p.m. UTC
From: Filipe Manana <fdmanana@suse.com>

If we rename an inode A (be it a file or a directory), create a new
inode B with the old name of inode A and under the same parent directory,
fsync inode B and then power fail, at log tree replay time we end up
removing inode A completely. If inode A is a directory then all its files
are gone too.

Example scenarios where this happens:
This is reproducible with the following steps, taken from a couple of
test cases written for fstests which are going to be submitted upstream
soon:

   # Scenario 1

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir -p /mnt/a/x
   echo "hello" > /mnt/a/x/foo
   echo "world" > /mnt/a/x/bar
   sync
   mv /mnt/a/x /mnt/a/y
   mkdir /mnt/a/x
   xfs_io -c fsync /mnt/a/x
   <power failure happens>

   The next time the fs is mounted, log tree replay happens and
   the directory "y" does not exist nor do the files "foo" and
   "bar" exist anywhere (neither in "y" nor in "x", nor the root
   nor anywhere).

   # Scenario 2

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir /mnt/a
   echo "hello" > /mnt/a/foo
   sync
   mv /mnt/a/foo /mnt/a/bar
   echo "world" > /mnt/a/foo
   xfs_io -c fsync /mnt/a/foo
   <power failure happens>

   The next time the fs is mounted, log tree replay happens and the
   file "bar" does not exists anymore. A file with the name "foo"
   exists and it matches the second file we created.

Another related problem that does not involve file/data loss is when a
new inode is created with the name of a deleted snapshot and we fsync it:

   mkfs.btrfs -f /dev/sdc
   mount /dev/sdc /mnt
   mkdir /mnt/testdir
   btrfs subvolume snapshot /mnt /mnt/testdir/snap
   btrfs subvolume delete /mnt/testdir/snap
   rmdir /mnt/testdir
   mkdir /mnt/testdir
   xfs_io -c fsync /mnt/testdir # or fsync some file inside /mnt/testdir
   <power failure>

   The next time the fs is mounted the log replay procedure fails because
   it attempts to delete the snapshot entry (which has dir item key type
   of BTRFS_ROOT_ITEM_KEY) as if it were a regular (non-root) entry,
   resulting in the following error that causes mount to fail:

   [52174.510532] BTRFS info (device dm-0): failed to delete reference to snap, inode 257 parent 257
   [52174.512570] ------------[ cut here ]------------
   [52174.513278] WARNING: CPU: 12 PID: 28024 at fs/btrfs/inode.c:3986 __btrfs_unlink_inode+0x178/0x351 [btrfs]()
   [52174.514681] BTRFS: Transaction aborted (error -2)
   [52174.515630] Modules linked in: btrfs dm_flakey dm_mod overlay crc32c_generic ppdev xor raid6_pq acpi_cpufreq parport_pc tpm_tis sg parport tpm evdev i2c_piix4 proc
   [52174.521568] CPU: 12 PID: 28024 Comm: mount Tainted: G        W       4.5.0-rc6-btrfs-next-27+ #1
   [52174.522805] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS by qemu-project.org 04/01/2014
   [52174.524053]  0000000000000000 ffff8801df2a7710 ffffffff81264e93 ffff8801df2a7758
   [52174.524053]  0000000000000009 ffff8801df2a7748 ffffffff81051618 ffffffffa03591cd
   [52174.524053]  00000000fffffffe ffff88015e6e5000 ffff88016dbc3c88 ffff88016dbc3c88
   [52174.524053] Call Trace:
   [52174.524053]  [<ffffffff81264e93>] dump_stack+0x67/0x90
   [52174.524053]  [<ffffffff81051618>] warn_slowpath_common+0x99/0xb2
   [52174.524053]  [<ffffffffa03591cd>] ? __btrfs_unlink_inode+0x178/0x351 [btrfs]
   [52174.524053]  [<ffffffff81051679>] warn_slowpath_fmt+0x48/0x50
   [52174.524053]  [<ffffffffa03591cd>] __btrfs_unlink_inode+0x178/0x351 [btrfs]
   [52174.524053]  [<ffffffff8118f5e9>] ? iput+0xb0/0x284
   [52174.524053]  [<ffffffffa0359fe8>] btrfs_unlink_inode+0x1c/0x3d [btrfs]
   [52174.524053]  [<ffffffffa038631e>] check_item_in_log+0x1fe/0x29b [btrfs]
   [52174.524053]  [<ffffffffa0386522>] replay_dir_deletes+0x167/0x1cf [btrfs]
   [52174.524053]  [<ffffffffa038739e>] fixup_inode_link_count+0x289/0x2aa [btrfs]
   [52174.524053]  [<ffffffffa038748a>] fixup_inode_link_counts+0xcb/0x105 [btrfs]
   [52174.524053]  [<ffffffffa038a5ec>] btrfs_recover_log_trees+0x258/0x32c [btrfs]
   [52174.524053]  [<ffffffffa03885b2>] ? replay_one_extent+0x511/0x511 [btrfs]
   [52174.524053]  [<ffffffffa034f288>] open_ctree+0x1dd4/0x21b9 [btrfs]
   [52174.524053]  [<ffffffffa032b753>] btrfs_mount+0x97e/0xaed [btrfs]
   [52174.524053]  [<ffffffff8108e1b7>] ? trace_hardirqs_on+0xd/0xf
   [52174.524053]  [<ffffffff8117bafa>] mount_fs+0x67/0x131
   [52174.524053]  [<ffffffff81193003>] vfs_kern_mount+0x6c/0xde
   [52174.524053]  [<ffffffffa032af81>] btrfs_mount+0x1ac/0xaed [btrfs]
   [52174.524053]  [<ffffffff8108e1b7>] ? trace_hardirqs_on+0xd/0xf
   [52174.524053]  [<ffffffff8108c262>] ? lockdep_init_map+0xb9/0x1b3
   [52174.524053]  [<ffffffff8117bafa>] mount_fs+0x67/0x131
   [52174.524053]  [<ffffffff81193003>] vfs_kern_mount+0x6c/0xde
   [52174.524053]  [<ffffffff8119590f>] do_mount+0x8a6/0x9e8
   [52174.524053]  [<ffffffff811358dd>] ? strndup_user+0x3f/0x59
   [52174.524053]  [<ffffffff81195c65>] SyS_mount+0x77/0x9f
   [52174.524053]  [<ffffffff814935d7>] entry_SYSCALL_64_fastpath+0x12/0x6b
   [52174.561288] ---[ end trace 6b53049efb1a3ea6 ]---

Fix this by forcing a transaction commit when such cases happen.
This means we check in the commit root of the subvolume tree if there
was any other inode with the same reference when the inode we are
fsync'ing is a new inode (created in the current transaction).

Test cases for fstests, covering all the scenarios given above, were
submitted upstream for fstests:

  * fstests: generic test for fsync after renaming directory
    https://patchwork.kernel.org/patch/8694281/

  * fstests: generic test for fsync after renaming file
    https://patchwork.kernel.org/patch/8694301/

  * fstests: add btrfs test for fsync after snapshot deletion
    https://patchwork.kernel.org/patch/8670671/

Cc: stable@vger.kernel.org
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---

V2: Node code changes, only updated the change log and the comment to
    be more clear about the problems solved by the new checks.

 fs/btrfs/tree-log.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 137 insertions(+)

Comments

Duncan March 31, 2016, 9:08 p.m. UTC | #1
fdmanana posted on Wed, 30 Mar 2016 23:37:21 +0100 as excerpted:

> From: Filipe Manana <fdmanana@suse.com>
> 
> If we rename an inode A (be it a file or a directory), create a new
> inode B with the old name of inode A and under the same parent
> directory, fsync inode B and then power fail, at log tree replay time
> we end up removing inode A completely. If inode A is a directory then
> all its files are gone too.

...

> V2: Node code changes, only updated the change log and the comment to
>     be more clear about the problems solved by the new checks.

If there's a V3 anyway, apparent typo:

s/Node code/No code/
Chris Mason March 31, 2016, 9:13 p.m. UTC | #2
On Wed, Mar 30, 2016 at 11:37:21PM +0100, fdmanana@kernel.org wrote:
> From: Filipe Manana <fdmanana@suse.com>
> 
> If we rename an inode A (be it a file or a directory), create a new
> inode B with the old name of inode A and under the same parent directory,
> fsync inode B and then power fail, at log tree replay time we end up
> removing inode A completely. If inode A is a directory then all its files
> are gone too.
> 
> Example scenarios where this happens:
> This is reproducible with the following steps, taken from a couple of
> test cases written for fstests which are going to be submitted upstream
> soon:

Thanks Filipe!  Since this is an older bug, I won't rush it into
tomorrow's pull, but I'll test and get it into next week.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Aug. 18, 2016, 3:03 p.m. UTC | #3
On Wed, Mar 30, 2016 at 11:37:21PM +0100, fdmanana@kernel.org wrote:
>From: Filipe Manana <fdmanana@suse.com>
>
>If we rename an inode A (be it a file or a directory), create a new
>inode B with the old name of inode A and under the same parent directory,
>fsync inode B and then power fail, at log tree replay time we end up
>removing inode A completely. If inode A is a directory then all its files
>are gone too.

I bisected a crash with dbench down to this patch.  The reproduction 
was:

mkfs.btrfs -m single -f /dev/vdb
mount /dev/vdb /btrfs
cd /btrfs
mkdir clients
for x in `seq 0 100` ; do btrfs subvol create clients/client$x ; done
sync
dbench 100

In other words, run dbench with a subvol per dbench thread.  It crashes 
immediately, most often with an invalid access in copy_from_user during 
file_write.  The pattern of crashes and location just show general 
memory corruption and the actual stack trace wasn't very useful.

With this patch reverted the runs last much much longer, but we still 
hit a crash eventually.  It's not clear to me if this is two different 
bugs or if Filipe's patch just makes the corruption much easier to hit.  
I'm still digging through it all, but here's a common backtrace with
this patch reverted:

BUG: unable to handle kernel paging request at 0000000000017298
IP: [<ffffffff810ad8b9>] queued_spin_lock_slowpath+0x139/0x200
PGD 7df68a067 PUD 7df68b067 PMD 0
Oops: 0002 [#1] PREEMPT SMP
Modules linked in: crc32c_intel i2c_piix4 aesni_intel i2c_core 
aes_x86_64 glue_helper virtio_net serio_raw lrw floppy pcspkr gf128mul 
ablk_helper button cryptd sch_fq_codel autofs4 virtio_blk
CPU: 6 PID: 1125 Comm: dbench Not tainted 4.7.0-00001-g00cc018 #220
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.9.0-1.fc24 
04/01/2014
task: ffff88072c918e40 ti: ffff88072cf50000 task.ti: ffff88072cf50000
RIP: 0010:[<ffffffff810ad8b9>]  [<ffffffff810ad8b9>] queued_spin_lock_slowpath+0x139/0x200
RSP: 0018:ffff8807eff83ac8  EFLAGS: 00010002
RAX: 000000000000263d RBX: ffff8807eff97290 RCX: 00000000001d0000
RDX: 0000000000017298 RSI: ffff8807eff83b58 RDI: ffff8807540702fc
RBP: ffff8807eff83b88 R08: 0000000000000000 R09: 000000000001a228
R10: ffff88080fffad80 R11: 000000000000005a R12: 0000000000010000
R13: 0000000000000000 R14: ffff8807eff83d48 R15: 0000000000000003
FS:  00007fb2b8810700(0000) GS:ffff8807eff80000(0000) 
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000017298 CR3: 00000007df689000 CR4: 00000000000406e0
Stack:
 0000000000000000 ffff8807540702fc ffff8807ec37a918 0000000100000000
 ffff8807ec37a900 ffff8807ec37a980 ffff8807eff97290 ffffffff810961da
 000000000000003c ffff8800ba864dd8 ffff880798f94189 ffff8807ef89a000
Call Trace:
 <IRQ>
 [<ffffffff810961da>] ? select_idle_sibling+0x2a/0x120
 [<ffffffff81091385>] ? wake_up_process+0x15/0x20
 [<ffffffff81079c00>] ? wake_up_worker+0x30/0x40
 [<ffffffff8107c6c8>] ? insert_work+0x78/0xc0
 [<ffffffff8197421e>] _raw_spin_lock_irqsave+0x4e/0x50
 [<ffffffff81090f5c>] try_to_wake_up+0x3c/0x410
 [<ffffffff81973e76>] ? _raw_spin_unlock+0x16/0x40
 [<ffffffff8107c8c0>] ? __queue_work+0x1b0/0x530
 [<ffffffff8109bfcf>] ? update_cfs_shares+0xcf/0x110
 [<ffffffff81091342>] default_wake_function+0x12/0x20
 [<ffffffff810a5d56>] autoremove_wake_function+0x16/0x40
 [<ffffffff810a5df4>] wake_bit_function+0x34/0x40
 [<ffffffff810a5c06>] __wake_up_common+0x56/0x90
 [<ffffffff810a61f8>] __wake_up+0x48/0x70
 [<ffffffff810a6268>] __wake_up_bit+0x48/0x50
 [<ffffffff8115d1f1>] end_page_writeback+0x81/0xa0
 [<ffffffff813e9cb9>] end_bio_extent_writepage+0x79/0xe0
 [<ffffffff814a17eb>] bio_endio+0x6b/0x80
 [<ffffffff813f2d32>] btrfs_end_bio+0x102/0x190
 [<ffffffff814a17eb>] bio_endio+0x6b/0x80
 [<ffffffff814a6778>] blk_update_request+0x1e8/0x330
 [<ffffffff814b434a>] blk_mq_end_request+0x1a/0x40
 [<ffffffffa0000431>] virtblk_request_done+0x71/0xe0 [virtio_blk]
 [<ffffffff814b3420>] ? blkdev_issue_zeroout+0x1d0/0x1d0
 [<ffffffff814b3433>] __blk_mq_complete_request_remote+0x13/0x20
 [<ffffffff810df62b>] flush_smp_call_function_queue+0x8b/0x180
 [<ffffffff814f2ad7>] ? debug_smp_processor_id+0x17/0x20
 [<ffffffff810df733>] generic_smp_call_function_single_interrupt+0x13/0x20
 [<ffffffff810406b7>] smp_call_function_single_interrupt+0x27/0x40
 [<ffffffff8197564f>] call_function_single_interrupt+0x7f/0x90
 <EOI>
 [<ffffffff810ad8cc>] ? queued_spin_lock_slowpath+0x14c/0x200
 [<ffffffff810ad841>] ? queued_spin_lock_slowpath+0xc1/0x200
 [<ffffffff81973e2e>] ? _raw_spin_unlock_irqrestore+0xe/0x40
 [<ffffffff810af1f5>] queued_write_lock_slowpath+0x95/0xa0
 [<ffffffff810a5faf>] ? finish_wait+0x6f/0x90
 [<ffffffff8108feb8>] ? preempt_count_add+0xb8/0xd0
 [<ffffffff81974142>] _raw_write_lock+0x32/0x40
 [<ffffffff81409246>] btrfs_tree_lock+0x146/0x2c0
 [<ffffffff810a5d40>] ? woken_wake_function+0x20/0x20
 [<ffffffff8108feb8>] ? preempt_count_add+0xb8/0xd0
 [<ffffffff8197418e>] ? _raw_read_lock+0x3e/0x40
 [<ffffffff81409708>] ? btrfs_tree_read_lock+0x78/0x170
 [<ffffffff810a5d40>] ? woken_wake_function+0x20/0x20
 [<ffffffff810a5d56>] ? autoremove_wake_function+0x16/0x40
 [<ffffffff81396aaf>] ? btrfs_root_node+0x4f/0x90
 [<ffffffff81396c44>] btrfs_lock_root_node+0x34/0x50
 [<ffffffff8139f3d9>] btrfs_search_slot+0x769/0x9c0
 [<ffffffff8140909c>] ? btrfs_tree_unlock+0x6c/0xd0
 [<ffffffff813b8129>] btrfs_del_csums+0x239/0x330
 [<ffffffff813affaf>] __btrfs_free_extent+0x73f/0xe00
 [<ffffffff811c924d>] ? kmem_cache_free+0x22d/0x240
 [<ffffffff813b1237>] __btrfs_run_delayed_refs+0xbc7/0x1300
 [<ffffffff8115f2bf>] ? find_get_pages_tag+0x18f/0x2f0
 [<ffffffff813efd2d>] ? extent_write_cache_pages.clone.0+0x3dd/0x460
 [<ffffffff813b19fa>] btrfs_run_delayed_refs+0x8a/0x2b0
 [<ffffffff813c6c21>] btrfs_commit_transaction+0x51/0xcb0
 [<ffffffff8115d6ab>] ? __filemap_fdatawait_range+0x9b/0x170
 [<ffffffff8108feb8>] ? preempt_count_add+0xb8/0xd0
 [<ffffffff81973df7>] ? _raw_spin_unlock_irq+0x17/0x40
 [<ffffffff813e62d7>] ? btrfs_lookup_first_ordered_extent+0x97/0xd0
 [<ffffffff813e6401>] ? btrfs_wait_ordered_range+0xf1/0x130
 [<ffffffff813dc34e>] btrfs_sync_file+0x3ce/0x4b0
 [<ffffffff81102040>] ? __audit_syscall_entry+0xb0/0x110
 [<ffffffff8121d8cc>] vfs_fsync_range+0x4c/0xb0
 [<ffffffff810026ab>] ? syscall_trace_enter_phase1+0xfb/0x120
 [<ffffffff8121d94c>] vfs_fsync+0x1c/0x20
 [<ffffffff8121d98d>] do_fsync+0x3d/0x70
 [<ffffffff8121d9f0>] SyS_fsync+0x10/0x20
 [<ffffffff81002d97>] do_syscall_64+0x57/0xb0
 [<ffffffff81002531>] ? prepare_exit_to_usermode+0x31/0x40
 [<ffffffff819744bc>] entry_SYSCALL64_slow_path+0x25/0x25
Code: 48 89 9d 70 ff ff ff 48 89 c2 48 8d 75 d0 48 c1 ea 0c c1 e8 12 83 
e2 30 ff c8 48 81 c2 80 72 01 00 48 98 48 03 14 c5 c0 ba f3 81 <48> 89 
1a 48 8d 53 08 8b 43 08 89 45 d0 85 c0 75 0a f3 90 8b 02
RIP  [<ffffffff810ad8b9>] queued_spin_lock_slowpath+0x139/0x200
 RSP <ffff8807eff83ac8>
CR2: 0000000000017298
---[ end trace b53934847871f7b8 ]---
Kernel panic - not syncing: Fatal exception in interrupt

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason Sept. 3, 2016, 8:05 p.m. UTC | #4
On 08/18/2016 11:03 AM, Chris Mason wrote:
> On Wed, Mar 30, 2016 at 11:37:21PM +0100, fdmanana@kernel.org wrote:
>> From: Filipe Manana <fdmanana@suse.com>
>>
>> If we rename an inode A (be it a file or a directory), create a new
>> inode B with the old name of inode A and under the same parent directory,
>> fsync inode B and then power fail, at log tree replay time we end up
>> removing inode A completely. If inode A is a directory then all its files
>> are gone too.
>
> I bisected a crash with dbench down to this patch.  The reproduction was:
>
> mkfs.btrfs -m single -f /dev/vdb
> mount /dev/vdb /btrfs
> cd /btrfs
> mkdir clients
> for x in `seq 0 100` ; do btrfs subvol create clients/client$x ; done
> sync
> dbench 100
>
> In other words, run dbench with a subvol per dbench thread.  It crashes
> immediately, most often with an invalid access in copy_from_user during
> file_write.  The pattern of crashes and location just show general
> memory corruption and the actual stack trace wasn't very useful.
>
> With this patch reverted the runs last much much longer, but we still
> hit a crash eventually.  It's not clear to me if this is two different
> bugs or if Filipe's patch just makes the corruption much easier to hit.
> I'm still digging through it all, but here's a common backtrace with
> this patch reverted:
>

Lots of debugging later, we're leaving btrfs_log_ctx structures from the 
stack in lists without taking them out before btrfs_sync_file() or 
btrfs_sync_log() exit.  I'm still figuring out all of the corner cases 
where it happens, but at least things are starting to make sense again.

I should have a patch out the door on Tuesday.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 24d03c7..517d0cc 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4415,6 +4415,127 @@  static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
 	return ret;
 }
 
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x                 # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+					 const int slot,
+					 const struct btrfs_key *key,
+					 struct inode *inode)
+{
+	int ret;
+	struct btrfs_path *search_path;
+	char *name = NULL;
+	u32 name_len = 0;
+	u32 item_size = btrfs_item_size_nr(eb, slot);
+	u32 cur_offset = 0;
+	unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+	search_path = btrfs_alloc_path();
+	if (!search_path)
+		return -ENOMEM;
+	search_path->search_commit_root = 1;
+	search_path->skip_locking = 1;
+
+	while (cur_offset < item_size) {
+		u64 parent;
+		u32 this_name_len;
+		u32 this_len;
+		unsigned long name_ptr;
+		struct btrfs_dir_item *di;
+
+		if (key->type == BTRFS_INODE_REF_KEY) {
+			struct btrfs_inode_ref *iref;
+
+			iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+			parent = key->offset;
+			this_name_len = btrfs_inode_ref_name_len(eb, iref);
+			name_ptr = (unsigned long)(iref + 1);
+			this_len = sizeof(*iref) + this_name_len;
+		} else {
+			struct btrfs_inode_extref *extref;
+
+			extref = (struct btrfs_inode_extref *)(ptr +
+							       cur_offset);
+			parent = btrfs_inode_extref_parent(eb, extref);
+			this_name_len = btrfs_inode_extref_name_len(eb, extref);
+			name_ptr = (unsigned long)&extref->name;
+			this_len = sizeof(*extref) + this_name_len;
+		}
+
+		if (this_name_len > name_len) {
+			char *new_name;
+
+			new_name = krealloc(name, this_name_len, GFP_NOFS);
+			if (!new_name) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			name_len = this_name_len;
+			name = new_name;
+		}
+
+		read_extent_buffer(eb, name, name_ptr, this_name_len);
+		di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+					   search_path, parent,
+					   name, this_name_len, 0);
+		if (di && !IS_ERR(di)) {
+			ret = 1;
+			goto out;
+		} else if (IS_ERR(di)) {
+			ret = PTR_ERR(di);
+			goto out;
+		}
+		btrfs_release_path(search_path);
+
+		cur_offset += this_len;
+	}
+	ret = 0;
+out:
+	btrfs_free_path(search_path);
+	kfree(name);
+	return ret;
+}
+
 /* log a single inode in the tree log.
  * At least one parent directory for this inode must exist in the tree
  * or be logged already.
@@ -4602,6 +4723,22 @@  again:
 		if (min_key.type == BTRFS_INODE_ITEM_KEY)
 			need_log_inode_item = false;
 
+		if ((min_key.type == BTRFS_INODE_REF_KEY ||
+		     min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+		    BTRFS_I(inode)->generation == trans->transid) {
+			ret = btrfs_check_ref_name_override(path->nodes[0],
+							    path->slots[0],
+							    &min_key, inode);
+			if (ret < 0) {
+				err = ret;
+				goto out_unlock;
+			} else if (ret > 0) {
+				err = 1;
+				btrfs_set_log_full_commit(root->fs_info, trans);
+				goto out_unlock;
+			}
+		}
+
 		/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
 		if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
 			if (ins_nr == 0)