Message ID | 20160225163927.GW17997@ZenIV.linux.org.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Feb 25, 2016 at 04:39:27PM +0000, Al Viro wrote: > Hrm... OK, seeing that you still seem to trigger those within an hour or > two (and *any* of remaining WARN_ON() are serious bugs - none of the > "mitigation had been triggered" remained, sorry for not making it clear), > let's try this. Again, any WARN_ON triggered means that we'd caught something, > whether it progresses into oops or not. Any news on that one? I'm going to carve fixes for understood bugs out of that one and put those into tonight push, but it would be nice to sort out all remaining crap lurking in that area... Another question: what about the very first trace you'd posted, with apparent GPF at 00000050? Have you seen anything like that afterwards? > diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c > index c6d7d3d..86f81e3 100644 > --- a/fs/autofs4/root.c > +++ b/fs/autofs4/root.c > @@ -323,6 +323,7 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) > struct dentry *new = d_lookup(parent, &dentry->d_name); > if (!new) > return NULL; > + WARN_ON(d_is_negative(new)); > ino = autofs4_dentry_ino(new); > ino->last_used = jiffies; > dput(path->dentry); > diff --git a/fs/namei.c b/fs/namei.c > index f624d13..daa6b25 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -1209,6 +1209,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) > /* Handle an automount point */ > if (managed & DCACHE_NEED_AUTOMOUNT) { > ret = follow_automount(path, nd, &need_mntput); > + WARN_ON(d_is_negative(path->dentry)); > if (ret < 0) > break; > continue; > @@ -1260,6 +1261,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, > { > for (;;) { > struct mount *mounted; > + void *p; > /* > * Don't forget we might have a non-mountpoint managed dentry > * that wants to block transit. > @@ -1289,7 +1291,9 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, > * dentry sequence number here after this d_inode read, > * because a mount-point is always pinned. > */ > - *inode = path->dentry->d_inode; > + p = *inode = path->dentry->d_inode; > + if (unlikely(!p)) > + WARN_ON(!read_seqretry(&mount_lock, nd->m_seq)); > } > return !read_seqretry(&mount_lock, nd->m_seq) && > !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); > @@ -1580,10 +1584,12 @@ static int lookup_fast(struct nameidata *nd, > */ > if (negative) > return -ENOENT; > + WARN_ON(!*inode); // ->d_seq was fucked somehow > path->mnt = mnt; > path->dentry = dentry; > - if (likely(__follow_mount_rcu(nd, path, inode, seqp))) > + if (likely(__follow_mount_rcu(nd, path, inode, seqp))) { > return 0; > + } > unlazy: > if (unlazy_walk(nd, dentry, seq)) > return -ECHILD; > @@ -1613,8 +1619,10 @@ unlazy: > path->mnt = mnt; > path->dentry = dentry; > err = follow_managed(path, nd); > - if (likely(!err)) > + if (likely(!err)) { > *inode = d_backing_inode(path->dentry); > + WARN_ON(!*inode); > + } > return err; > > need_lookup: > @@ -1712,6 +1720,12 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link, > return 0; > if (!follow) > return 0; > + /* make sure that d_is_symlink above matches inode */ > + if (nd->flags & LOOKUP_RCU) { > + if (read_seqcount_retry(&link->dentry->d_seq, seq)) > + return -ECHILD; > + } > + WARN_ON(!inode); // now, _that_ should not happen. > return pick_link(nd, link, inode, seq); > } > > @@ -1743,11 +1757,11 @@ static int walk_component(struct nameidata *nd, int flags) > if (err < 0) > return err; > > - inode = d_backing_inode(path.dentry); > seq = 0; /* we are already out of RCU mode */ > err = -ENOENT; > if (d_is_negative(path.dentry)) > goto out_path_put; > + inode = d_backing_inode(path.dentry); > } > > if (flags & WALK_PUT) > @@ -3106,8 +3120,10 @@ static int do_last(struct nameidata *nd, > nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; > /* we _can_ be in RCU mode here */ > error = lookup_fast(nd, &path, &inode, &seq); > - if (likely(!error)) > + if (likely(!error)) { > + WARN_ON(!inode); > goto finish_lookup; > + } > > if (error < 0) > return error; > @@ -3192,12 +3208,13 @@ retry_lookup: > return error; > > BUG_ON(nd->flags & LOOKUP_RCU); > - inode = d_backing_inode(path.dentry); > seq = 0; /* out of RCU mode, so the value doesn't matter */ > if (unlikely(d_is_negative(path.dentry))) { > path_to_nameidata(&path, nd); > return -ENOENT; > } > + inode = d_backing_inode(path.dentry); > + WARN_ON(!inode); > finish_lookup: > if (nd->depth) > put_link(nd); > @@ -3206,11 +3223,6 @@ finish_lookup: > if (unlikely(error)) > return error; > > - if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { > - path_to_nameidata(&path, nd); > - return -ELOOP; > - } > - > if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { > path_to_nameidata(&path, nd); > } else { > @@ -3229,6 +3241,10 @@ finish_open: > return error; > } > audit_inode(nd->name, nd->path.dentry, 0); > + if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { > + error = -ELOOP; > + goto out; > + } > error = -EISDIR; > if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) > goto out; > @@ -3273,6 +3289,10 @@ opened: > goto exit_fput; > } > out: > + if (unlikely(error > 0)) { > + WARN_ON(1); > + error = -EINVAL; > + } > if (got_write) > mnt_drop_write(nd->path.mnt); > path_put(&save_parent); > diff --git a/fs/namespace.c b/fs/namespace.c > index 4fb1691..4128a5c 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -1060,6 +1060,8 @@ static void cleanup_mnt(struct mount *mnt) > * so mnt_get_writers() below is safe. > */ > WARN_ON(mnt_get_writers(mnt)); > + WARN_ON(!mnt->mnt.mnt_root->d_inode); // some joker has managed to > + // make mnt_root negative on us > if (unlikely(mnt->mnt_pins.first)) > mnt_pin_kill(mnt); > fsnotify_vfsmount_delete(&mnt->mnt); > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 26, 2016 at 10:21 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: > On Thu, Feb 25, 2016 at 04:39:27PM +0000, Al Viro wrote: >> Hrm... OK, seeing that you still seem to trigger those within an hour or >> two (and *any* of remaining WARN_ON() are serious bugs - none of the >> "mitigation had been triggered" remained, sorry for not making it clear), >> let's try this. Again, any WARN_ON triggered means that we'd caught something, >> whether it progresses into oops or not. > > Any news on that one? I'm going to carve fixes for understood bugs out of > that one and put those into tonight push, but it would be nice to sort out > all remaining crap lurking in that area... > > Another question: what about the very first trace you'd posted, with apparent > GPF at 00000050? Have you seen anything like that afterwards? No, I did not have time to retest. GPF at 00000050 was not mine, it was Mickaël's. I did not try to reproduce mine first. But most likely it is the same as the one I reproduced lately (GPF at NULL in atime_needs_update). -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 26, 2016 at 10:25:21PM +0100, Dmitry Vyukov wrote: > On Fri, Feb 26, 2016 at 10:21 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: > > On Thu, Feb 25, 2016 at 04:39:27PM +0000, Al Viro wrote: > >> Hrm... OK, seeing that you still seem to trigger those within an hour or > >> two (and *any* of remaining WARN_ON() are serious bugs - none of the > >> "mitigation had been triggered" remained, sorry for not making it clear), > >> let's try this. Again, any WARN_ON triggered means that we'd caught something, > >> whether it progresses into oops or not. > > > > Any news on that one? I'm going to carve fixes for understood bugs out of > > that one and put those into tonight push, but it would be nice to sort out > > all remaining crap lurking in that area... > > > > Another question: what about the very first trace you'd posted, with apparent > > GPF at 00000050? Have you seen anything like that afterwards? > > No, I did not have time to retest. > > GPF at 00000050 was not mine, it was Mickaël's. Ah, OK - his is basically a forced nd->stack[] underrun, with passing a never-assigned nd->link_inode to atime_needs_update(), so we are just passing a contents of uninitialized stack word there and while it ends up possible to dereference, it's not an address of struct inode and the first attempt to follow a pointer in what would've been a struct inode at that address (accessing inode->i_sb->s_flags) did blow up with GPF at offsetof(struct super_block, s_flags). All right, so we basically have several understood ones with fixes plus something unknown that leads to lookup_fast() returning 0 with NULL in *inode in about an hour or two on your setup... -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Feb 26, 2016 at 10:07:59PM +0000, Al Viro wrote: > On Fri, Feb 26, 2016 at 10:25:21PM +0100, Dmitry Vyukov wrote: > > On Fri, Feb 26, 2016 at 10:21 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: > > > On Thu, Feb 25, 2016 at 04:39:27PM +0000, Al Viro wrote: > > >> Hrm... OK, seeing that you still seem to trigger those within an hour or > > >> two (and *any* of remaining WARN_ON() are serious bugs - none of the > > >> "mitigation had been triggered" remained, sorry for not making it clear), > > >> let's try this. Again, any WARN_ON triggered means that we'd caught something, > > >> whether it progresses into oops or not. > > > > > > Any news on that one? I'm going to carve fixes for understood bugs out of > > > that one and put those into tonight push, but it would be nice to sort out > > > all remaining crap lurking in that area... > > > > > > Another question: what about the very first trace you'd posted, with apparent > > > GPF at 00000050? Have you seen anything like that afterwards? > > > > No, I did not have time to retest. > > > > GPF at 00000050 was not mine, it was Mickaël's. > > Ah, OK - his is basically a forced nd->stack[] underrun, with passing a > never-assigned nd->link_inode to atime_needs_update(), so we are just > passing a contents of uninitialized stack word there and while it ends > up possible to dereference, it's not an address of struct inode and the > first attempt to follow a pointer in what would've been a struct inode > at that address (accessing inode->i_sb->s_flags) did blow up with GPF at > offsetof(struct super_block, s_flags). > > All right, so we basically have several understood ones with fixes plus > something unknown that leads to lookup_fast() returning 0 with NULL in > *inode in about an hour or two on your setup... BTW, what kind of userland are you using? The thing is, shared-subtree setups differ, and if the crap is anywhere near vfsmount handling, that could have some impact... So far I hadn't been able to trigger any of these WARN_ON(); setup here is debian/testing on 4-way KVM guest with 4Gb memory given to it running on a 6-way host (Phenom II X6 1100T, 3.3GHz, 16Gb RAM total); 4.2 with debian/stable userland on host. What's the setup on your reproducer? -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, Feb 28, 2016 at 4:43 PM, Dmitry Vyukov <dvyukov@google.com> wrote: > On Sat, Feb 27, 2016 at 11:27 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: >> On Fri, Feb 26, 2016 at 10:07:59PM +0000, Al Viro wrote: >>> On Fri, Feb 26, 2016 at 10:25:21PM +0100, Dmitry Vyukov wrote: >>> > On Fri, Feb 26, 2016 at 10:21 PM, Al Viro <viro@zeniv.linux.org.uk> wrote: >>> > > On Thu, Feb 25, 2016 at 04:39:27PM +0000, Al Viro wrote: >>> > >> Hrm... OK, seeing that you still seem to trigger those within an hour or >>> > >> two (and *any* of remaining WARN_ON() are serious bugs - none of the >>> > >> "mitigation had been triggered" remained, sorry for not making it clear), >>> > >> let's try this. Again, any WARN_ON triggered means that we'd caught something, >>> > >> whether it progresses into oops or not. >>> > > >>> > > Any news on that one? I'm going to carve fixes for understood bugs out of >>> > > that one and put those into tonight push, but it would be nice to sort out >>> > > all remaining crap lurking in that area... >>> > > >>> > > Another question: what about the very first trace you'd posted, with apparent >>> > > GPF at 00000050? Have you seen anything like that afterwards? >>> > >>> > No, I did not have time to retest. >>> > >>> > GPF at 00000050 was not mine, it was Mickaël's. >>> >>> Ah, OK - his is basically a forced nd->stack[] underrun, with passing a >>> never-assigned nd->link_inode to atime_needs_update(), so we are just >>> passing a contents of uninitialized stack word there and while it ends >>> up possible to dereference, it's not an address of struct inode and the >>> first attempt to follow a pointer in what would've been a struct inode >>> at that address (accessing inode->i_sb->s_flags) did blow up with GPF at >>> offsetof(struct super_block, s_flags). >>> >>> All right, so we basically have several understood ones with fixes plus >>> something unknown that leads to lookup_fast() returning 0 with NULL in >>> *inode in about an hour or two on your setup... >> >> BTW, what kind of userland are you using? The thing is, shared-subtree >> setups differ, and if the crap is anywhere near vfsmount handling, that >> could have some impact... So far I hadn't been able to trigger any of >> these WARN_ON(); setup here is debian/testing on 4-way KVM guest with 4Gb >> memory given to it running on a 6-way host (Phenom II X6 1100T, 3.3GHz, 16Gb >> RAM total); 4.2 with debian/stable userland on host. What's the setup on >> your reproducer? > > > Restarted fuzzer with the latest patch on top of > 0fcbf996d848d03573113d83f4e3fb3bcfa5ab5e. > >> All that stops these warnings from triggering atime_... oopsen is that dentry >> involved isn't a symlink one. > > What worries me is that I am running the same program in the same > setup. The program does operate on symlinks and previous it triggered > oopses. But now it does not. I've also rebased onto latest Linus tree, > maybe that made difference... > > My userspace is a Debian Wheezy built using this script: > https://github.com/google/syzkaller/blob/master/tools/create-image.sh > > I run it in qemu as: > $ qemu-system-x86_64 -hda wheezy.img -net > user,host=10.0.2.10,hostfwd=tcp::10022-:22 -net nic -nographic -kernel > arch/x86/boot/bzImage -append "console=ttyS0 root=/dev/sda debug > earlyprintk=serial slub_debug=UZ" -enable-kvm -pidfile vm_pid -m 2G > -numa node,nodeid=0,cpus=0-1 -numa node,nodeid=1,cpus=2-3 -smp > sockets=2,cores=2,threads=1 -usb -usbdevice mouse -usbdevice tablet > -soundhw all > > I also use a pretty beefy config (attached) which includes KASAN and > KCOV both of which introduce significant slowdown and can affect > thread interleavings. What was triggered so far is this. As far as I see it it roughly the same as before. [ 1422.292356] ------------[ cut here ]------------ [ 1422.292841] WARNING: CPU: 0 PID: 32603 at fs/namei.c:1587 lookup_fast+0x3fa/0x450() [ 1422.293543] Modules linked in: [ 1422.293868] CPU: 0 PID: 32603 Comm: syz-executor Not tainted 4.5.0-rc4+ #75 [ 1422.294426] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 1422.294482] 0000000000000000 ffff8800148d3c48 ffffffff81931fc9 0000000000000000 [ 1422.294482] ffffffff83314939 ffff8800148d3c80 ffffffff8116eee1 ffff8800148d3de0 [ 1422.294482] ffff8800148d3d90 ffff8800148d3d98 ffff8800148d3d8c 0000000000000001 [ 1422.294482] Call Trace: [ 1422.294482] [<ffffffff81931fc9>] dump_stack+0x99/0xd0 [ 1422.294482] [<ffffffff8116eee1>] warn_slowpath_common+0x81/0xc0 [ 1422.294482] [<ffffffff8116efd5>] warn_slowpath_null+0x15/0x20 [ 1422.294482] [<ffffffff8130e89a>] lookup_fast+0x3fa/0x450 [ 1422.294482] [<ffffffff8130f388>] ? link_path_walk+0x68/0x4e0 [ 1422.294482] [<ffffffff8130fe66>] ? path_init+0x666/0x810 [ 1422.294482] [<ffffffff81310775>] path_openat+0x375/0x1520 [ 1422.294482] [<ffffffff811c780d>] ? trace_hardirqs_on+0xd/0x10 [ 1422.294482] [<ffffffff81313129>] do_filp_open+0x79/0xd0 [ 1422.294482] [<ffffffff82ae3022>] ? _raw_spin_unlock+0x22/0x30 [ 1422.294482] [<ffffffff81322af8>] ? __alloc_fd+0xf8/0x200 [ 1422.294482] [<ffffffff81300c10>] do_sys_open+0x110/0x1f0 [ 1422.294482] [<ffffffff81300d1f>] SyS_openat+0xf/0x20 [ 1422.294482] [<ffffffff82ae3ab6>] entry_SYSCALL_64_fastpath+0x16/0x7a [ 1422.304062] ---[ end trace 658f7fb8fc01ebf0 ]--- [ 1422.304425] ------------[ cut here ]------------ [ 1422.304842] WARNING: CPU: 0 PID: 32603 at fs/namei.c:3124 path_openat+0x12bc/0x1520() [ 1422.305551] Modules linked in: [ 1422.305803] CPU: 0 PID: 32603 Comm: syz-executor Tainted: G W 4.5.0-rc4+ #75 [ 1422.306476] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 [ 1422.306476] 0000000000000000 ffff8800148d3cb8 ffffffff81931fc9 0000000000000000 [ 1422.306476] ffffffff83314939 ffff8800148d3cf0 ffffffff8116eee1 0000000000000005 [ 1422.306476] ffff8800148d3d98 0000000000048000 ffff8800148d3de0 ffff8800148d3efc [ 1422.306476] Call Trace: [ 1422.306476] [<ffffffff81931fc9>] dump_stack+0x99/0xd0 [ 1422.306476] [<ffffffff8116eee1>] warn_slowpath_common+0x81/0xc0 [ 1422.306476] [<ffffffff8116efd5>] warn_slowpath_null+0x15/0x20 [ 1422.306476] [<ffffffff813116bc>] path_openat+0x12bc/0x1520 [ 1422.306476] [<ffffffff81313129>] do_filp_open+0x79/0xd0 [ 1422.306476] [<ffffffff82ae3022>] ? _raw_spin_unlock+0x22/0x30 [ 1422.306476] [<ffffffff81322af8>] ? __alloc_fd+0xf8/0x200 [ 1422.306476] [<ffffffff81300c10>] do_sys_open+0x110/0x1f0 [ 1422.306476] [<ffffffff81300d1f>] SyS_openat+0xf/0x20 [ 1422.306476] [<ffffffff82ae3ab6>] entry_SYSCALL_64_fastpath+0x16/0x7a [ 1422.314201] ---[ end trace 658f7fb8fc01ebf1 ]--- INIT: Id "V0" respawning too fast: disabled for 5 minutes -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[dhowells Cc'd] On Sun, Feb 28, 2016 at 05:04:19PM +0100, Dmitry Vyukov wrote: > > [ 1422.292356] ------------[ cut here ]------------ > [ 1422.292841] WARNING: CPU: 0 PID: 32603 at fs/namei.c:1587 > lookup_fast+0x3fa/0x450() Huh? So you have dentry = __d_lookup_rcu(parent, &nd->last, &seq); returning non-NULL dentry, then *inode = d_backing_inode(dentry); negative = d_is_negative(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; followed by by *inode == NULL and negative == true? Nuts... OK, that removes vfsmounts from consideration, but... How the fuck is that possible? We have smp_rmb(); seq = &dentry->d_seq->sequence & ~1; see that ->d_name and ->d_parent match what we are looking for, then *inode = dentry->d_inode; type = READ_ONCE(dentry->d_flags); smp_rmb(); negative = (type & DCACHE_ENTRY_TYPE) == DCACHE_MISS_TYPE; smp_rmb(); if (dentry->d_seq->sequence != seq) sod off and observe *inode == NULL && !negative Erm... What's to order ->d_inode and ->d_flags fetches there? David? Looks like the barrier in d_is_negative() is on the wrong side of fetch. Confused... -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index c6d7d3d..86f81e3 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -323,6 +323,7 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) struct dentry *new = d_lookup(parent, &dentry->d_name); if (!new) return NULL; + WARN_ON(d_is_negative(new)); ino = autofs4_dentry_ino(new); ino->last_used = jiffies; dput(path->dentry); diff --git a/fs/namei.c b/fs/namei.c index f624d13..daa6b25 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1209,6 +1209,7 @@ static int follow_managed(struct path *path, struct nameidata *nd) /* Handle an automount point */ if (managed & DCACHE_NEED_AUTOMOUNT) { ret = follow_automount(path, nd, &need_mntput); + WARN_ON(d_is_negative(path->dentry)); if (ret < 0) break; continue; @@ -1260,6 +1261,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, { for (;;) { struct mount *mounted; + void *p; /* * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. @@ -1289,7 +1291,9 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * dentry sequence number here after this d_inode read, * because a mount-point is always pinned. */ - *inode = path->dentry->d_inode; + p = *inode = path->dentry->d_inode; + if (unlikely(!p)) + WARN_ON(!read_seqretry(&mount_lock, nd->m_seq)); } return !read_seqretry(&mount_lock, nd->m_seq) && !(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT); @@ -1580,10 +1584,12 @@ static int lookup_fast(struct nameidata *nd, */ if (negative) return -ENOENT; + WARN_ON(!*inode); // ->d_seq was fucked somehow path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode, seqp))) + if (likely(__follow_mount_rcu(nd, path, inode, seqp))) { return 0; + } unlazy: if (unlazy_walk(nd, dentry, seq)) return -ECHILD; @@ -1613,8 +1619,10 @@ unlazy: path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd); - if (likely(!err)) + if (likely(!err)) { *inode = d_backing_inode(path->dentry); + WARN_ON(!*inode); + } return err; need_lookup: @@ -1712,6 +1720,12 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link, return 0; if (!follow) return 0; + /* make sure that d_is_symlink above matches inode */ + if (nd->flags & LOOKUP_RCU) { + if (read_seqcount_retry(&link->dentry->d_seq, seq)) + return -ECHILD; + } + WARN_ON(!inode); // now, _that_ should not happen. return pick_link(nd, link, inode, seq); } @@ -1743,11 +1757,11 @@ static int walk_component(struct nameidata *nd, int flags) if (err < 0) return err; - inode = d_backing_inode(path.dentry); seq = 0; /* we are already out of RCU mode */ err = -ENOENT; if (d_is_negative(path.dentry)) goto out_path_put; + inode = d_backing_inode(path.dentry); } if (flags & WALK_PUT) @@ -3106,8 +3120,10 @@ static int do_last(struct nameidata *nd, nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &path, &inode, &seq); - if (likely(!error)) + if (likely(!error)) { + WARN_ON(!inode); goto finish_lookup; + } if (error < 0) return error; @@ -3192,12 +3208,13 @@ retry_lookup: return error; BUG_ON(nd->flags & LOOKUP_RCU); - inode = d_backing_inode(path.dentry); seq = 0; /* out of RCU mode, so the value doesn't matter */ if (unlikely(d_is_negative(path.dentry))) { path_to_nameidata(&path, nd); return -ENOENT; } + inode = d_backing_inode(path.dentry); + WARN_ON(!inode); finish_lookup: if (nd->depth) put_link(nd); @@ -3206,11 +3223,6 @@ finish_lookup: if (unlikely(error)) return error; - if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { - path_to_nameidata(&path, nd); - return -ELOOP; - } - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { path_to_nameidata(&path, nd); } else { @@ -3229,6 +3241,10 @@ finish_open: return error; } audit_inode(nd->name, nd->path.dentry, 0); + if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { + error = -ELOOP; + goto out; + } error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) goto out; @@ -3273,6 +3289,10 @@ opened: goto exit_fput; } out: + if (unlikely(error > 0)) { + WARN_ON(1); + error = -EINVAL; + } if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); diff --git a/fs/namespace.c b/fs/namespace.c index 4fb1691..4128a5c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1060,6 +1060,8 @@ static void cleanup_mnt(struct mount *mnt) * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); + WARN_ON(!mnt->mnt.mnt_root->d_inode); // some joker has managed to + // make mnt_root negative on us if (unlikely(mnt->mnt_pins.first)) mnt_pin_kill(mnt); fsnotify_vfsmount_delete(&mnt->mnt);