Message ID | 20250414-rennt-wimmeln-f186c3a780f1@brauner (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | fs: add kern_path_locked_negative() | expand |
On 4/14/25 22:13, Christian Brauner wrote: > The audit code relies on the fact that kern_path_locked() returned a > path even for a negative dentry. If it doesn't find a valid dentry it > immediately calls: > > audit_find_parent(d_backing_inode(parent_path.dentry)); > > which assumes that parent_path.dentry is still valid. But it isn't since > kern_path_locked() has been changed to path_put() also for a negative > dentry. > > Fix this by adding a helper that implements the required audit semantics > and allows us to fix the immediate bleeding. We can find a unified > solution for this afterwards. I've been seeing these warnings on every boot since rc1: https://paste.opensuse.org/pastes/5501e8bfc22f One time there was also an oops later on: https://paste.opensuse.org/pastes/48406b5d1c96 At least the warnings are gone with this patch. Thanks! Reported-and-tested-by: Vlastimil Babka <vbabka@suse.cz> > Fixes: 1c3cb50b58c3 ("VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry") > Signed-off-by: Christian Brauner <brauner@kernel.org> > --- > fs/namei.c | 65 ++++++++++++++++++++++++++++++++----------- > include/linux/namei.h | 1 + > kernel/audit_watch.c | 16 +++++++---- > 3 files changed, 60 insertions(+), 22 deletions(-) > > diff --git a/fs/namei.c b/fs/namei.c > index 360a86ca1f02..2c5ca9ef6811 100644 > --- a/fs/namei.c > +++ b/fs/namei.c > @@ -1665,27 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name, > return dentry; > } > > -/* > - * Parent directory has inode locked exclusive. This is one > - * and only case when ->lookup() gets called on non in-lookup > - * dentries - as the matter of fact, this only gets called > - * when directory is guaranteed to have no in-lookup children > - * at all. > - * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. > - * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. > - */ > -struct dentry *lookup_one_qstr_excl(const struct qstr *name, > - struct dentry *base, > - unsigned int flags) > +static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name, > + struct dentry *base, > + unsigned int flags) > { > - struct dentry *dentry = lookup_dcache(name, base, flags); > + struct dentry *dentry; > struct dentry *old; > - struct inode *dir = base->d_inode; > + struct inode *dir; > > + dentry = lookup_dcache(name, base, flags); > if (dentry) > - goto found; > + return dentry; > > /* Don't create child dentry for a dead directory. */ > + dir = base->d_inode; > if (unlikely(IS_DEADDIR(dir))) > return ERR_PTR(-ENOENT); > > @@ -1698,7 +1691,24 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, > dput(dentry); > dentry = old; > } > -found: > + return dentry; > +} > + > +/* > + * Parent directory has inode locked exclusive. This is one > + * and only case when ->lookup() gets called on non in-lookup > + * dentries - as the matter of fact, this only gets called > + * when directory is guaranteed to have no in-lookup children > + * at all. > + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. > + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. > + */ > +struct dentry *lookup_one_qstr_excl(const struct qstr *name, > + struct dentry *base, unsigned int flags) > +{ > + struct dentry *dentry; > + > + dentry = lookup_one_qstr_excl_raw(name, base, flags); > if (IS_ERR(dentry)) > return dentry; > if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { > @@ -2762,6 +2772,29 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct > return d; > } > > +struct dentry *kern_path_locked_negative(const char *name, struct path *path) > +{ > + struct filename *filename __free(putname) = getname_kernel(name); > + struct dentry *d; > + struct qstr last; > + int type, error; > + > + error = filename_parentat(AT_FDCWD, filename, 0, path, &last, &type); > + if (error) > + return ERR_PTR(error); > + if (unlikely(type != LAST_NORM)) { > + path_put(path); > + return ERR_PTR(-EINVAL); > + } > + inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); > + d = lookup_one_qstr_excl_raw(&last, path->dentry, 0); > + if (IS_ERR(d)) { > + inode_unlock(path->dentry->d_inode); > + path_put(path); > + } > + return d; > +} > + > struct dentry *kern_path_locked(const char *name, struct path *path) > { > struct filename *filename = getname_kernel(name); > diff --git a/include/linux/namei.h b/include/linux/namei.h > index e3042176cdf4..bbaf55fb3101 100644 > --- a/include/linux/namei.h > +++ b/include/linux/namei.h > @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne > extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); > extern void done_path_create(struct path *, struct dentry *); > extern struct dentry *kern_path_locked(const char *, struct path *); > +extern struct dentry *kern_path_locked_negative(const char *, struct path *); > extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); > int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, > struct path *parent, struct qstr *last, int *type, > diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c > index 367eaf2c78b7..0ebbbe37a60f 100644 > --- a/kernel/audit_watch.c > +++ b/kernel/audit_watch.c > @@ -347,12 +347,17 @@ static void audit_remove_parent_watches(struct audit_parent *parent) > /* Get path information necessary for adding watches. */ > static int audit_get_nd(struct audit_watch *watch, struct path *parent) > { > - struct dentry *d = kern_path_locked(watch->path, parent); > + struct dentry *d; > + > + d = kern_path_locked_negative(watch->path, parent); > if (IS_ERR(d)) > return PTR_ERR(d); > - /* update watch filter fields */ > - watch->dev = d->d_sb->s_dev; > - watch->ino = d_backing_inode(d)->i_ino; > + > + if (d_is_positive(d)) { > + /* update watch filter fields */ > + watch->dev = d->d_sb->s_dev; > + watch->ino = d_backing_inode(d)->i_ino; > + } > > inode_unlock(d_backing_inode(parent->dentry)); > dput(d); > @@ -418,11 +423,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) > /* caller expects mutex locked */ > mutex_lock(&audit_filter_mutex); > > - if (ret && ret != -ENOENT) { > + if (ret) { > audit_put_watch(watch); > return ret; > } > - ret = 0; > > /* either find an old parent or attach a new one */ > parent = audit_find_parent(d_backing_inode(parent_path.dentry));
diff --git a/fs/namei.c b/fs/namei.c index 360a86ca1f02..2c5ca9ef6811 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1665,27 +1665,20 @@ static struct dentry *lookup_dcache(const struct qstr *name, return dentry; } -/* - * Parent directory has inode locked exclusive. This is one - * and only case when ->lookup() gets called on non in-lookup - * dentries - as the matter of fact, this only gets called - * when directory is guaranteed to have no in-lookup children - * at all. - * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. - * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. - */ -struct dentry *lookup_one_qstr_excl(const struct qstr *name, - struct dentry *base, - unsigned int flags) +static struct dentry *lookup_one_qstr_excl_raw(const struct qstr *name, + struct dentry *base, + unsigned int flags) { - struct dentry *dentry = lookup_dcache(name, base, flags); + struct dentry *dentry; struct dentry *old; - struct inode *dir = base->d_inode; + struct inode *dir; + dentry = lookup_dcache(name, base, flags); if (dentry) - goto found; + return dentry; /* Don't create child dentry for a dead directory. */ + dir = base->d_inode; if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); @@ -1698,7 +1691,24 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, dput(dentry); dentry = old; } -found: + return dentry; +} + +/* + * Parent directory has inode locked exclusive. This is one + * and only case when ->lookup() gets called on non in-lookup + * dentries - as the matter of fact, this only gets called + * when directory is guaranteed to have no in-lookup children + * at all. + * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. + * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. + */ +struct dentry *lookup_one_qstr_excl(const struct qstr *name, + struct dentry *base, unsigned int flags) +{ + struct dentry *dentry; + + dentry = lookup_one_qstr_excl_raw(name, base, flags); if (IS_ERR(dentry)) return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { @@ -2762,6 +2772,29 @@ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct return d; } +struct dentry *kern_path_locked_negative(const char *name, struct path *path) +{ + struct filename *filename __free(putname) = getname_kernel(name); + struct dentry *d; + struct qstr last; + int type, error; + + error = filename_parentat(AT_FDCWD, filename, 0, path, &last, &type); + if (error) + return ERR_PTR(error); + if (unlikely(type != LAST_NORM)) { + path_put(path); + return ERR_PTR(-EINVAL); + } + inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); + d = lookup_one_qstr_excl_raw(&last, path->dentry, 0); + if (IS_ERR(d)) { + inode_unlock(path->dentry->d_inode); + path_put(path); + } + return d; +} + struct dentry *kern_path_locked(const char *name, struct path *path) { struct filename *filename = getname_kernel(name); diff --git a/include/linux/namei.h b/include/linux/namei.h index e3042176cdf4..bbaf55fb3101 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,6 +62,7 @@ extern struct dentry *kern_path_create(int, const char *, struct path *, unsigne extern struct dentry *user_path_create(int, const char __user *, struct path *, unsigned int); extern void done_path_create(struct path *, struct dentry *); extern struct dentry *kern_path_locked(const char *, struct path *); +extern struct dentry *kern_path_locked_negative(const char *, struct path *); extern struct dentry *user_path_locked_at(int , const char __user *, struct path *); int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 367eaf2c78b7..0ebbbe37a60f 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -347,12 +347,17 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct dentry *d = kern_path_locked(watch->path, parent); + struct dentry *d; + + d = kern_path_locked_negative(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - /* update watch filter fields */ - watch->dev = d->d_sb->s_dev; - watch->ino = d_backing_inode(d)->i_ino; + + if (d_is_positive(d)) { + /* update watch filter fields */ + watch->dev = d->d_sb->s_dev; + watch->ino = d_backing_inode(d)->i_ino; + } inode_unlock(d_backing_inode(parent->dentry)); dput(d); @@ -418,11 +423,10 @@ int audit_add_watch(struct audit_krule *krule, struct list_head **list) /* caller expects mutex locked */ mutex_lock(&audit_filter_mutex); - if (ret && ret != -ENOENT) { + if (ret) { audit_put_watch(watch); return ret; } - ret = 0; /* either find an old parent or attach a new one */ parent = audit_find_parent(d_backing_inode(parent_path.dentry));
The audit code relies on the fact that kern_path_locked() returned a path even for a negative dentry. If it doesn't find a valid dentry it immediately calls: audit_find_parent(d_backing_inode(parent_path.dentry)); which assumes that parent_path.dentry is still valid. But it isn't since kern_path_locked() has been changed to path_put() also for a negative dentry. Fix this by adding a helper that implements the required audit semantics and allows us to fix the immediate bleeding. We can find a unified solution for this afterwards. Fixes: 1c3cb50b58c3 ("VFS: change kern_path_locked() and user_path_locked_at() to never return negative dentry") Signed-off-by: Christian Brauner <brauner@kernel.org> --- fs/namei.c | 65 ++++++++++++++++++++++++++++++++----------- include/linux/namei.h | 1 + kernel/audit_watch.c | 16 +++++++---- 3 files changed, 60 insertions(+), 22 deletions(-)