diff mbox series

[1/3] fs: use fake_file container for internal files with fake f_path

Message ID 20230609073239.957184-2-amir73il@gmail.com (mailing list archive)
State Under Review
Headers show
Series Reduce impact of overlayfs fake path files | expand

Commit Message

Amir Goldstein June 9, 2023, 7:32 a.m. UTC
Overlayfs and cachefiles use open_with_fake_path() to allocate internal
files, where overlayfs also puts a "fake" path in f_path - a path which
is not on the same fs as f_inode.

Allocate a container struct file_fake for those internal files, that
will be used to hold the fake path qlong with the real path.

This commit does not populate the extra fake_path field and leaves the
overlayfs internal file's f_path fake.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/cachefiles/namei.c |  2 +-
 fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
 fs/internal.h         |  5 ++-
 fs/namei.c            |  2 +-
 fs/open.c             | 11 +++---
 fs/overlayfs/file.c   |  2 +-
 include/linux/fs.h    | 13 ++++---
 7 files changed, 90 insertions(+), 30 deletions(-)

Comments

Christian Brauner June 9, 2023, 11:32 a.m. UTC | #1
On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> files, where overlayfs also puts a "fake" path in f_path - a path which
> is not on the same fs as f_inode.
> 
> Allocate a container struct file_fake for those internal files, that
> will be used to hold the fake path qlong with the real path.
> 
> This commit does not populate the extra fake_path field and leaves the
> overlayfs internal file's f_path fake.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/cachefiles/namei.c |  2 +-
>  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
>  fs/internal.h         |  5 ++-
>  fs/namei.c            |  2 +-
>  fs/open.c             | 11 +++---
>  fs/overlayfs/file.c   |  2 +-
>  include/linux/fs.h    | 13 ++++---
>  7 files changed, 90 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> index 82219a8f6084..a71bdf2d03ba 100644
> --- a/fs/cachefiles/namei.c
> +++ b/fs/cachefiles/namei.c
> @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
>  	path.mnt = cache->mnt;
>  	path.dentry = dentry;
>  	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> -				   d_backing_inode(dentry), cache->cache_cred);
> +				   &path, cache->cache_cred);
>  	if (IS_ERR(file)) {
>  		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
>  					   PTR_ERR(file),
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 372653b92617..adc2a92faa52 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
>  
>  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
>  
> +/* Container for file with optional fake path to display in /proc files */
> +struct file_fake {
> +	struct file file;
> +	struct path fake_path;
> +};
> +
> +static inline struct file_fake *file_fake(struct file *f)
> +{
> +	return container_of(f, struct file_fake, file);
> +}
> +
> +/* Returns fake_path if one exists, f_path otherwise */
> +const struct path *file_fake_path(struct file *f)
> +{
> +	struct file_fake *ff = file_fake(f);
> +
> +	if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> +		return &f->f_path;
> +
> +	return &ff->fake_path;
> +}
> +EXPORT_SYMBOL(file_fake_path);
> +
>  static void file_free_rcu(struct rcu_head *head)
>  {
>  	struct file *f = container_of(head, struct file, f_rcuhead);
>  
>  	put_cred(f->f_cred);
> -	kmem_cache_free(filp_cachep, f);
> +	if (f->f_mode & FMODE_FAKE_PATH)
> +		kfree(file_fake(f));
> +	else
> +		kmem_cache_free(filp_cachep, f);
>  }
>  
>  static inline void file_free(struct file *f)
>  {
> +	struct file_fake *ff = file_fake(f);
> +
>  	security_file_free(f);
> -	if (!(f->f_mode & FMODE_NOACCOUNT))
> +	if (f->f_mode & FMODE_FAKE_PATH)
> +		path_put(&ff->fake_path);
> +	else
>  		percpu_counter_dec(&nr_files);
>  	call_rcu(&f->f_rcuhead, file_free_rcu);
>  }
> @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
>  fs_initcall(init_fs_stat_sysctls);
>  #endif
>  
> -static struct file *__alloc_file(int flags, const struct cred *cred)
> +static int init_file(struct file *f, int flags, const struct cred *cred)
>  {
> -	struct file *f;
>  	int error;
>  
> -	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> -	if (unlikely(!f))
> -		return ERR_PTR(-ENOMEM);
> -
>  	f->f_cred = get_cred(cred);
>  	error = security_file_alloc(f);
>  	if (unlikely(error)) {
>  		file_free_rcu(&f->f_rcuhead);
> -		return ERR_PTR(error);
> +		return error;
>  	}
>  
>  	atomic_long_set(&f->f_count, 1);
> @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
>  	f->f_mode = OPEN_FMODE(flags);
>  	/* f->f_version: 0 */
>  
> +	return 0;
> +}
> +
> +static struct file *__alloc_file(int flags, const struct cred *cred)
> +{
> +	struct file *f;
> +	int error;
> +
> +	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> +	if (unlikely(!f))
> +		return ERR_PTR(-ENOMEM);
> +
> +	error = init_file(f, flags, cred);
> +	if (unlikely(error))
> +		return ERR_PTR(error);
> +
>  	return f;
>  }
>  
> @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
>  }
>  
>  /*
> - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> + * Variant of alloc_empty_file() that allocates a file_fake container
> + * and doesn't check and modify nr_files.
>   *
>   * Should not be used unless there's a very good reason to do so.
>   */
> -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> +				   const struct cred *cred)
>  {
> -	struct file *f = __alloc_file(flags, cred);
> +	struct file_fake *ff;
> +	int error;
>  
> -	if (!IS_ERR(f))
> -		f->f_mode |= FMODE_NOACCOUNT;
> +	ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> +	if (unlikely(!ff))
> +		return ERR_PTR(-ENOMEM);
>  
> -	return f;
> +	error = init_file(&ff->file, flags, cred);
> +	if (unlikely(error))
> +		return ERR_PTR(error);
> +
> +	ff->file.f_mode |= FMODE_FAKE_PATH;
> +	if (fake_path) {
> +		path_get(fake_path);
> +		ff->fake_path = *fake_path;
> +	}

Hm, I see that this check is mostly done for vfs_tmpfile_open() which
only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
NULL.

So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
is an invitation for NULL derefs sooner or later. I would simply
document that it's required to set ff->fake_path. For callers such as
vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
should set ff->fake_path to file->f_path.
Amir Goldstein June 9, 2023, 11:57 a.m. UTC | #2
On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > files, where overlayfs also puts a "fake" path in f_path - a path which
> > is not on the same fs as f_inode.
> >
> > Allocate a container struct file_fake for those internal files, that
> > will be used to hold the fake path qlong with the real path.
> >
> > This commit does not populate the extra fake_path field and leaves the
> > overlayfs internal file's f_path fake.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/cachefiles/namei.c |  2 +-
> >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> >  fs/internal.h         |  5 ++-
> >  fs/namei.c            |  2 +-
> >  fs/open.c             | 11 +++---
> >  fs/overlayfs/file.c   |  2 +-
> >  include/linux/fs.h    | 13 ++++---
> >  7 files changed, 90 insertions(+), 30 deletions(-)
> >
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 82219a8f6084..a71bdf2d03ba 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> >       path.mnt = cache->mnt;
> >       path.dentry = dentry;
> >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > -                                d_backing_inode(dentry), cache->cache_cred);
> > +                                &path, cache->cache_cred);
> >       if (IS_ERR(file)) {
> >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> >                                          PTR_ERR(file),
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 372653b92617..adc2a92faa52 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> >
> >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> >
> > +/* Container for file with optional fake path to display in /proc files */
> > +struct file_fake {
> > +     struct file file;
> > +     struct path fake_path;
> > +};
> > +
> > +static inline struct file_fake *file_fake(struct file *f)
> > +{
> > +     return container_of(f, struct file_fake, file);
> > +}
> > +
> > +/* Returns fake_path if one exists, f_path otherwise */
> > +const struct path *file_fake_path(struct file *f)
> > +{
> > +     struct file_fake *ff = file_fake(f);
> > +
> > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > +             return &f->f_path;
> > +
> > +     return &ff->fake_path;
> > +}
> > +EXPORT_SYMBOL(file_fake_path);
> > +
> >  static void file_free_rcu(struct rcu_head *head)
> >  {
> >       struct file *f = container_of(head, struct file, f_rcuhead);
> >
> >       put_cred(f->f_cred);
> > -     kmem_cache_free(filp_cachep, f);
> > +     if (f->f_mode & FMODE_FAKE_PATH)
> > +             kfree(file_fake(f));
> > +     else
> > +             kmem_cache_free(filp_cachep, f);
> >  }
> >
> >  static inline void file_free(struct file *f)
> >  {
> > +     struct file_fake *ff = file_fake(f);
> > +
> >       security_file_free(f);
> > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > +     if (f->f_mode & FMODE_FAKE_PATH)
> > +             path_put(&ff->fake_path);
> > +     else
> >               percpu_counter_dec(&nr_files);
> >       call_rcu(&f->f_rcuhead, file_free_rcu);
> >  }
> > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> >  fs_initcall(init_fs_stat_sysctls);
> >  #endif
> >
> > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > +static int init_file(struct file *f, int flags, const struct cred *cred)
> >  {
> > -     struct file *f;
> >       int error;
> >
> > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > -     if (unlikely(!f))
> > -             return ERR_PTR(-ENOMEM);
> > -
> >       f->f_cred = get_cred(cred);
> >       error = security_file_alloc(f);
> >       if (unlikely(error)) {
> >               file_free_rcu(&f->f_rcuhead);
> > -             return ERR_PTR(error);
> > +             return error;
> >       }
> >
> >       atomic_long_set(&f->f_count, 1);
> > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> >       f->f_mode = OPEN_FMODE(flags);
> >       /* f->f_version: 0 */
> >
> > +     return 0;
> > +}
> > +
> > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > +{
> > +     struct file *f;
> > +     int error;
> > +
> > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > +     if (unlikely(!f))
> > +             return ERR_PTR(-ENOMEM);
> > +
> > +     error = init_file(f, flags, cred);
> > +     if (unlikely(error))
> > +             return ERR_PTR(error);
> > +
> >       return f;
> >  }
> >
> > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> >  }
> >
> >  /*
> > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > + * Variant of alloc_empty_file() that allocates a file_fake container
> > + * and doesn't check and modify nr_files.
> >   *
> >   * Should not be used unless there's a very good reason to do so.
> >   */
> > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > +                                const struct cred *cred)
> >  {
> > -     struct file *f = __alloc_file(flags, cred);
> > +     struct file_fake *ff;
> > +     int error;
> >
> > -     if (!IS_ERR(f))
> > -             f->f_mode |= FMODE_NOACCOUNT;
> > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > +     if (unlikely(!ff))
> > +             return ERR_PTR(-ENOMEM);
> >
> > -     return f;
> > +     error = init_file(&ff->file, flags, cred);
> > +     if (unlikely(error))
> > +             return ERR_PTR(error);
> > +
> > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > +     if (fake_path) {
> > +             path_get(fake_path);
> > +             ff->fake_path = *fake_path;
> > +     }
>
> Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> NULL.
>
> So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> is an invitation for NULL derefs sooner or later. I would simply
> document that it's required to set ff->fake_path. For callers such as
> vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> should set ff->fake_path to file->f_path.

Makes sense.
I also took the liberty to re-arrange vfs_tmpfile_open() without the
unneeded if (!error) { nesting depth.

Thanks,
Amir.
Christian Brauner June 9, 2023, 12:12 p.m. UTC | #3
On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > is not on the same fs as f_inode.
> > >
> > > Allocate a container struct file_fake for those internal files, that
> > > will be used to hold the fake path qlong with the real path.
> > >
> > > This commit does not populate the extra fake_path field and leaves the
> > > overlayfs internal file's f_path fake.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/cachefiles/namei.c |  2 +-
> > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > >  fs/internal.h         |  5 ++-
> > >  fs/namei.c            |  2 +-
> > >  fs/open.c             | 11 +++---
> > >  fs/overlayfs/file.c   |  2 +-
> > >  include/linux/fs.h    | 13 ++++---
> > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > index 82219a8f6084..a71bdf2d03ba 100644
> > > --- a/fs/cachefiles/namei.c
> > > +++ b/fs/cachefiles/namei.c
> > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > >       path.mnt = cache->mnt;
> > >       path.dentry = dentry;
> > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > +                                &path, cache->cache_cred);
> > >       if (IS_ERR(file)) {
> > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > >                                          PTR_ERR(file),
> > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > index 372653b92617..adc2a92faa52 100644
> > > --- a/fs/file_table.c
> > > +++ b/fs/file_table.c
> > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > >
> > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > >
> > > +/* Container for file with optional fake path to display in /proc files */
> > > +struct file_fake {
> > > +     struct file file;
> > > +     struct path fake_path;
> > > +};
> > > +
> > > +static inline struct file_fake *file_fake(struct file *f)
> > > +{
> > > +     return container_of(f, struct file_fake, file);
> > > +}
> > > +
> > > +/* Returns fake_path if one exists, f_path otherwise */
> > > +const struct path *file_fake_path(struct file *f)
> > > +{
> > > +     struct file_fake *ff = file_fake(f);
> > > +
> > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > +             return &f->f_path;
> > > +
> > > +     return &ff->fake_path;
> > > +}
> > > +EXPORT_SYMBOL(file_fake_path);
> > > +
> > >  static void file_free_rcu(struct rcu_head *head)
> > >  {
> > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > >
> > >       put_cred(f->f_cred);
> > > -     kmem_cache_free(filp_cachep, f);
> > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > +             kfree(file_fake(f));
> > > +     else
> > > +             kmem_cache_free(filp_cachep, f);
> > >  }
> > >
> > >  static inline void file_free(struct file *f)
> > >  {
> > > +     struct file_fake *ff = file_fake(f);
> > > +
> > >       security_file_free(f);
> > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > +             path_put(&ff->fake_path);
> > > +     else
> > >               percpu_counter_dec(&nr_files);
> > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > >  }
> > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > >  fs_initcall(init_fs_stat_sysctls);
> > >  #endif
> > >
> > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > >  {
> > > -     struct file *f;
> > >       int error;
> > >
> > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > -     if (unlikely(!f))
> > > -             return ERR_PTR(-ENOMEM);
> > > -
> > >       f->f_cred = get_cred(cred);
> > >       error = security_file_alloc(f);
> > >       if (unlikely(error)) {
> > >               file_free_rcu(&f->f_rcuhead);
> > > -             return ERR_PTR(error);
> > > +             return error;
> > >       }
> > >
> > >       atomic_long_set(&f->f_count, 1);
> > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > >       f->f_mode = OPEN_FMODE(flags);
> > >       /* f->f_version: 0 */
> > >
> > > +     return 0;
> > > +}
> > > +
> > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > +{
> > > +     struct file *f;
> > > +     int error;
> > > +
> > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > +     if (unlikely(!f))
> > > +             return ERR_PTR(-ENOMEM);
> > > +
> > > +     error = init_file(f, flags, cred);
> > > +     if (unlikely(error))
> > > +             return ERR_PTR(error);
> > > +
> > >       return f;
> > >  }
> > >
> > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > >  }
> > >
> > >  /*
> > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > + * and doesn't check and modify nr_files.
> > >   *
> > >   * Should not be used unless there's a very good reason to do so.
> > >   */
> > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > +                                const struct cred *cred)
> > >  {
> > > -     struct file *f = __alloc_file(flags, cred);
> > > +     struct file_fake *ff;
> > > +     int error;
> > >
> > > -     if (!IS_ERR(f))
> > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > +     if (unlikely(!ff))
> > > +             return ERR_PTR(-ENOMEM);
> > >
> > > -     return f;
> > > +     error = init_file(&ff->file, flags, cred);
> > > +     if (unlikely(error))
> > > +             return ERR_PTR(error);
> > > +
> > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > +     if (fake_path) {
> > > +             path_get(fake_path);
> > > +             ff->fake_path = *fake_path;
> > > +     }
> >
> > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > NULL.
> >
> > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > is an invitation for NULL derefs sooner or later. I would simply
> > document that it's required to set ff->fake_path. For callers such as
> > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > should set ff->fake_path to file->f_path.
> 
> Makes sense.
> I also took the liberty to re-arrange vfs_tmpfile_open() without the
> unneeded if (!error) { nesting depth.

Yes, please. I had a rough sketch just for my own amusement...

fs/namei.c
  struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
                                const struct path *parentpath, umode_t mode,
                                int open_flag, const struct cred *cred)
  {
          struct file *file;
          int error;

          file = alloc_empty_file_fake(open_flag, cred);
          if (IS_ERR(file))
                  return file;

          error = vfs_tmpfile(idmap, parentpath, file, mode);
          if (error) {
                  fput(file);
                  return ERR_PTR(error);
          }

          return file_set_fake_path(file, &file->f_path);
  }
  EXPORT_SYMBOL(vfs_tmpfile_open);

fs/internal.h
  struct file *file_set_fake_path(struct file *file, const struct path *fake_path);

fs/open.c
  struct file *open_with_fake_path(const struct path *fake_path, int flags,
                                   const struct path *path,
                                   const struct cred *cred)
  {
          int error;
          struct file *file;

          file = alloc_empty_file_fake(flags, cred);
          if (IS_ERR(file))
                  return file;

          file->f_path = *path;
          error = do_dentry_open(file, d_inode(path->dentry), NULL);
          if (error) {
                  fput(file);
                  return ERR_PTR(error);
          }

          return file_set_fake_path(file, fake_path);
  }

fs/file_table.c
  struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
  {
          struct file_fake *ff;
          int error;

          ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
          if (unlikely(!ff))
                  return ERR_PTR(-ENOMEM);

          error = init_file(&ff->file, flags, cred);
          if (unlikely(error))
                  return ERR_PTR(error);

          ff->file.f_mode |= FMODE_FAKE_PATH;
          return &ff->file;
  }

  struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
  {
          if (file->f_mode & FMODE_FAKE_PATH) {
                  struct file_fake *ff = file_fake(file);
                  ff->fake_path = *fake_path;
          }

          return file;
  }
Amir Goldstein June 9, 2023, 12:20 p.m. UTC | #4
On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > is not on the same fs as f_inode.
> > > >
> > > > Allocate a container struct file_fake for those internal files, that
> > > > will be used to hold the fake path qlong with the real path.
> > > >
> > > > This commit does not populate the extra fake_path field and leaves the
> > > > overlayfs internal file's f_path fake.
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >  fs/cachefiles/namei.c |  2 +-
> > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > >  fs/internal.h         |  5 ++-
> > > >  fs/namei.c            |  2 +-
> > > >  fs/open.c             | 11 +++---
> > > >  fs/overlayfs/file.c   |  2 +-
> > > >  include/linux/fs.h    | 13 ++++---
> > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > --- a/fs/cachefiles/namei.c
> > > > +++ b/fs/cachefiles/namei.c
> > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > >       path.mnt = cache->mnt;
> > > >       path.dentry = dentry;
> > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > +                                &path, cache->cache_cred);
> > > >       if (IS_ERR(file)) {
> > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > >                                          PTR_ERR(file),
> > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > index 372653b92617..adc2a92faa52 100644
> > > > --- a/fs/file_table.c
> > > > +++ b/fs/file_table.c
> > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > >
> > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > >
> > > > +/* Container for file with optional fake path to display in /proc files */
> > > > +struct file_fake {
> > > > +     struct file file;
> > > > +     struct path fake_path;
> > > > +};
> > > > +
> > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > +{
> > > > +     return container_of(f, struct file_fake, file);
> > > > +}
> > > > +
> > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > +const struct path *file_fake_path(struct file *f)
> > > > +{
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > +             return &f->f_path;
> > > > +
> > > > +     return &ff->fake_path;
> > > > +}
> > > > +EXPORT_SYMBOL(file_fake_path);
> > > > +
> > > >  static void file_free_rcu(struct rcu_head *head)
> > > >  {
> > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > >
> > > >       put_cred(f->f_cred);
> > > > -     kmem_cache_free(filp_cachep, f);
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             kfree(file_fake(f));
> > > > +     else
> > > > +             kmem_cache_free(filp_cachep, f);
> > > >  }
> > > >
> > > >  static inline void file_free(struct file *f)
> > > >  {
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > >       security_file_free(f);
> > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             path_put(&ff->fake_path);
> > > > +     else
> > > >               percpu_counter_dec(&nr_files);
> > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > >  }
> > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > >  fs_initcall(init_fs_stat_sysctls);
> > > >  #endif
> > > >
> > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > >  {
> > > > -     struct file *f;
> > > >       int error;
> > > >
> > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > -     if (unlikely(!f))
> > > > -             return ERR_PTR(-ENOMEM);
> > > > -
> > > >       f->f_cred = get_cred(cred);
> > > >       error = security_file_alloc(f);
> > > >       if (unlikely(error)) {
> > > >               file_free_rcu(&f->f_rcuhead);
> > > > -             return ERR_PTR(error);
> > > > +             return error;
> > > >       }
> > > >
> > > >       atomic_long_set(&f->f_count, 1);
> > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > >       f->f_mode = OPEN_FMODE(flags);
> > > >       /* f->f_version: 0 */
> > > >
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +{
> > > > +     struct file *f;
> > > > +     int error;
> > > > +
> > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > +     if (unlikely(!f))
> > > > +             return ERR_PTR(-ENOMEM);
> > > > +
> > > > +     error = init_file(f, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > >       return f;
> > > >  }
> > > >
> > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > >  }
> > > >
> > > >  /*
> > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > + * and doesn't check and modify nr_files.
> > > >   *
> > > >   * Should not be used unless there's a very good reason to do so.
> > > >   */
> > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > +                                const struct cred *cred)
> > > >  {
> > > > -     struct file *f = __alloc_file(flags, cred);
> > > > +     struct file_fake *ff;
> > > > +     int error;
> > > >
> > > > -     if (!IS_ERR(f))
> > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > +     if (unlikely(!ff))
> > > > +             return ERR_PTR(-ENOMEM);
> > > >
> > > > -     return f;
> > > > +     error = init_file(&ff->file, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > +     if (fake_path) {
> > > > +             path_get(fake_path);
> > > > +             ff->fake_path = *fake_path;
> > > > +     }
> > >
> > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > NULL.
> > >
> > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > is an invitation for NULL derefs sooner or later. I would simply
> > > document that it's required to set ff->fake_path. For callers such as
> > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > should set ff->fake_path to file->f_path.
> >
> > Makes sense.
> > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > unneeded if (!error) { nesting depth.
>
> Yes, please. I had a rough sketch just for my own amusement...

Happy to make your Friday more amusing :-D

>
> fs/namei.c
>   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
>                                 const struct path *parentpath, umode_t mode,
>                                 int open_flag, const struct cred *cred)
>   {
>           struct file *file;
>           int error;
>
>           file = alloc_empty_file_fake(open_flag, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           error = vfs_tmpfile(idmap, parentpath, file, mode);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, &file->f_path);
>   }
>   EXPORT_SYMBOL(vfs_tmpfile_open);
>
> fs/internal.h
>   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
>
> fs/open.c
>   struct file *open_with_fake_path(const struct path *fake_path, int flags,
>                                    const struct path *path,
>                                    const struct cred *cred)
>   {
>           int error;
>           struct file *file;
>
>           file = alloc_empty_file_fake(flags, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           file->f_path = *path;
>           error = do_dentry_open(file, d_inode(path->dentry), NULL);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, fake_path);
>   }
>
> fs/file_table.c
>   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
>   {
>           struct file_fake *ff;
>           int error;
>
>           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
>           if (unlikely(!ff))
>                   return ERR_PTR(-ENOMEM);
>
>           error = init_file(&ff->file, flags, cred);
>           if (unlikely(error))
>                   return ERR_PTR(error);
>
>           ff->file.f_mode |= FMODE_FAKE_PATH;
>           return &ff->file;
>   }
>
>   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
>   {
>           if (file->f_mode & FMODE_FAKE_PATH) {
>                   struct file_fake *ff = file_fake(file);
>                   ff->fake_path = *fake_path;
>           }
>
>           return file;
>   }
>

Heh, I also started with file_set_fake_path() but I decided that it's not
worth it, because no code should be messing with this and I just changed
file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
in these two helpers.

I will add file_set_fake_path *only* if you insist.

Thanks,
Amir.
Christian Brauner June 9, 2023, 12:54 p.m. UTC | #5
On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > >
> > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > is not on the same fs as f_inode.
> > > > >
> > > > > Allocate a container struct file_fake for those internal files, that
> > > > > will be used to hold the fake path qlong with the real path.
> > > > >
> > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > overlayfs internal file's f_path fake.
> > > > >
> > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > ---
> > > > >  fs/cachefiles/namei.c |  2 +-
> > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > >  fs/internal.h         |  5 ++-
> > > > >  fs/namei.c            |  2 +-
> > > > >  fs/open.c             | 11 +++---
> > > > >  fs/overlayfs/file.c   |  2 +-
> > > > >  include/linux/fs.h    | 13 ++++---
> > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > >
> > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > --- a/fs/cachefiles/namei.c
> > > > > +++ b/fs/cachefiles/namei.c
> > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > >       path.mnt = cache->mnt;
> > > > >       path.dentry = dentry;
> > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > +                                &path, cache->cache_cred);
> > > > >       if (IS_ERR(file)) {
> > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > >                                          PTR_ERR(file),
> > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > index 372653b92617..adc2a92faa52 100644
> > > > > --- a/fs/file_table.c
> > > > > +++ b/fs/file_table.c
> > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > >
> > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > >
> > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > +struct file_fake {
> > > > > +     struct file file;
> > > > > +     struct path fake_path;
> > > > > +};
> > > > > +
> > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > +{
> > > > > +     return container_of(f, struct file_fake, file);
> > > > > +}
> > > > > +
> > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > +const struct path *file_fake_path(struct file *f)
> > > > > +{
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > +             return &f->f_path;
> > > > > +
> > > > > +     return &ff->fake_path;
> > > > > +}
> > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > +
> > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > >  {
> > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > >
> > > > >       put_cred(f->f_cred);
> > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             kfree(file_fake(f));
> > > > > +     else
> > > > > +             kmem_cache_free(filp_cachep, f);
> > > > >  }
> > > > >
> > > > >  static inline void file_free(struct file *f)
> > > > >  {
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > >       security_file_free(f);
> > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             path_put(&ff->fake_path);
> > > > > +     else
> > > > >               percpu_counter_dec(&nr_files);
> > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > >  }
> > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > >  #endif
> > > > >
> > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f;
> > > > >       int error;
> > > > >
> > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > -     if (unlikely(!f))
> > > > > -             return ERR_PTR(-ENOMEM);
> > > > > -
> > > > >       f->f_cred = get_cred(cred);
> > > > >       error = security_file_alloc(f);
> > > > >       if (unlikely(error)) {
> > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > -             return ERR_PTR(error);
> > > > > +             return error;
> > > > >       }
> > > > >
> > > > >       atomic_long_set(&f->f_count, 1);
> > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > >       /* f->f_version: 0 */
> > > > >
> > > > > +     return 0;
> > > > > +}
> > > > > +
> > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +{
> > > > > +     struct file *f;
> > > > > +     int error;
> > > > > +
> > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > +     if (unlikely(!f))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > > +
> > > > > +     error = init_file(f, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > >       return f;
> > > > >  }
> > > > >
> > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > >  }
> > > > >
> > > > >  /*
> > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > + * and doesn't check and modify nr_files.
> > > > >   *
> > > > >   * Should not be used unless there's a very good reason to do so.
> > > > >   */
> > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > +                                const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > +     struct file_fake *ff;
> > > > > +     int error;
> > > > >
> > > > > -     if (!IS_ERR(f))
> > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > +     if (unlikely(!ff))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > >
> > > > > -     return f;
> > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > +     if (fake_path) {
> > > > > +             path_get(fake_path);
> > > > > +             ff->fake_path = *fake_path;
> > > > > +     }
> > > >
> > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > NULL.
> > > >
> > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > document that it's required to set ff->fake_path. For callers such as
> > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > should set ff->fake_path to file->f_path.
> > >
> > > Makes sense.
> > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > unneeded if (!error) { nesting depth.
> >
> > Yes, please. I had a rough sketch just for my own amusement...
> 
> Happy to make your Friday more amusing :-D
> 
> >
> > fs/namei.c
> >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> >                                 const struct path *parentpath, umode_t mode,
> >                                 int open_flag, const struct cred *cred)
> >   {
> >           struct file *file;
> >           int error;
> >
> >           file = alloc_empty_file_fake(open_flag, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, &file->f_path);
> >   }
> >   EXPORT_SYMBOL(vfs_tmpfile_open);
> >
> > fs/internal.h
> >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
> >
> > fs/open.c
> >   struct file *open_with_fake_path(const struct path *fake_path, int flags,
> >                                    const struct path *path,
> >                                    const struct cred *cred)
> >   {
> >           int error;
> >           struct file *file;
> >
> >           file = alloc_empty_file_fake(flags, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           file->f_path = *path;
> >           error = do_dentry_open(file, d_inode(path->dentry), NULL);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, fake_path);
> >   }
> >
> > fs/file_table.c
> >   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
> >   {
> >           struct file_fake *ff;
> >           int error;
> >
> >           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> >           if (unlikely(!ff))
> >                   return ERR_PTR(-ENOMEM);
> >
> >           error = init_file(&ff->file, flags, cred);
> >           if (unlikely(error))
> >                   return ERR_PTR(error);
> >
> >           ff->file.f_mode |= FMODE_FAKE_PATH;
> >           return &ff->file;
> >   }
> >
> >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
> >   {
> >           if (file->f_mode & FMODE_FAKE_PATH) {
> >                   struct file_fake *ff = file_fake(file);
> >                   ff->fake_path = *fake_path;
> >           }
> >
> >           return file;
> >   }
> >
> 
> Heh, I also started with file_set_fake_path() but I decided that it's not
> worth it, because no code should be messing with this and I just changed
> file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
> in these two helpers.

Hm, I don't understand. This is non-exported and only visible in thing
that can use internal.h so only core vfs coe.

The only places that use this are vfs_tmpfile_open() and
open_with_fake_path(). It allows us to remove the additional argument
from alloc_empty_file_fake() and that's what I really like because now
we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs
doing it in one step.

Hm, one thing I realized is that this moves vfs_tmpfile_open() out of
filp_cachep which isn't great, no? That's a pretty heavily used codepath
so it feels that it should probably continue to use the cache?

What about keeping FMODE_NOACCOUNT and adding FMODE_FAKE_PATH. I think
that might be better. Christoph has just removed 3 FMODE_* flags with
his decoupling of block specific flags from file generic flags. So as
far as I'm concerned this wouldn't be a problem.
Christian Brauner June 9, 2023, 1 p.m. UTC | #6
On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote:
> On Fri, Jun 09, 2023 at 03:20:14PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > > >
> > > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > > is not on the same fs as f_inode.
> > > > > >
> > > > > > Allocate a container struct file_fake for those internal files, that
> > > > > > will be used to hold the fake path qlong with the real path.
> > > > > >
> > > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > > overlayfs internal file's f_path fake.
> > > > > >
> > > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > > ---
> > > > > >  fs/cachefiles/namei.c |  2 +-
> > > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > > >  fs/internal.h         |  5 ++-
> > > > > >  fs/namei.c            |  2 +-
> > > > > >  fs/open.c             | 11 +++---
> > > > > >  fs/overlayfs/file.c   |  2 +-
> > > > > >  include/linux/fs.h    | 13 ++++---
> > > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > > >
> > > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > > --- a/fs/cachefiles/namei.c
> > > > > > +++ b/fs/cachefiles/namei.c
> > > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > > >       path.mnt = cache->mnt;
> > > > > >       path.dentry = dentry;
> > > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > > +                                &path, cache->cache_cred);
> > > > > >       if (IS_ERR(file)) {
> > > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > > >                                          PTR_ERR(file),
> > > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > > index 372653b92617..adc2a92faa52 100644
> > > > > > --- a/fs/file_table.c
> > > > > > +++ b/fs/file_table.c
> > > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > > >
> > > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > > >
> > > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > > +struct file_fake {
> > > > > > +     struct file file;
> > > > > > +     struct path fake_path;
> > > > > > +};
> > > > > > +
> > > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > > +{
> > > > > > +     return container_of(f, struct file_fake, file);
> > > > > > +}
> > > > > > +
> > > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > > +const struct path *file_fake_path(struct file *f)
> > > > > > +{
> > > > > > +     struct file_fake *ff = file_fake(f);
> > > > > > +
> > > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > > +             return &f->f_path;
> > > > > > +
> > > > > > +     return &ff->fake_path;
> > > > > > +}
> > > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > > +
> > > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > > >  {
> > > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > > >
> > > > > >       put_cred(f->f_cred);
> > > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > > +             kfree(file_fake(f));
> > > > > > +     else
> > > > > > +             kmem_cache_free(filp_cachep, f);
> > > > > >  }
> > > > > >
> > > > > >  static inline void file_free(struct file *f)
> > > > > >  {
> > > > > > +     struct file_fake *ff = file_fake(f);
> > > > > > +
> > > > > >       security_file_free(f);
> > > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > > +             path_put(&ff->fake_path);
> > > > > > +     else
> > > > > >               percpu_counter_dec(&nr_files);
> > > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > > >  }
> > > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > > >  #endif
> > > > > >
> > > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > > >  {
> > > > > > -     struct file *f;
> > > > > >       int error;
> > > > > >
> > > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > > -     if (unlikely(!f))
> > > > > > -             return ERR_PTR(-ENOMEM);
> > > > > > -
> > > > > >       f->f_cred = get_cred(cred);
> > > > > >       error = security_file_alloc(f);
> > > > > >       if (unlikely(error)) {
> > > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > > -             return ERR_PTR(error);
> > > > > > +             return error;
> > > > > >       }
> > > > > >
> > > > > >       atomic_long_set(&f->f_count, 1);
> > > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > > >       /* f->f_version: 0 */
> > > > > >
> > > > > > +     return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > > +{
> > > > > > +     struct file *f;
> > > > > > +     int error;
> > > > > > +
> > > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > > +     if (unlikely(!f))
> > > > > > +             return ERR_PTR(-ENOMEM);
> > > > > > +
> > > > > > +     error = init_file(f, flags, cred);
> > > > > > +     if (unlikely(error))
> > > > > > +             return ERR_PTR(error);
> > > > > > +
> > > > > >       return f;
> > > > > >  }
> > > > > >
> > > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > > >  }
> > > > > >
> > > > > >  /*
> > > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > > + * and doesn't check and modify nr_files.
> > > > > >   *
> > > > > >   * Should not be used unless there's a very good reason to do so.
> > > > > >   */
> > > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > > +                                const struct cred *cred)
> > > > > >  {
> > > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > > +     struct file_fake *ff;
> > > > > > +     int error;
> > > > > >
> > > > > > -     if (!IS_ERR(f))
> > > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > > +     if (unlikely(!ff))
> > > > > > +             return ERR_PTR(-ENOMEM);
> > > > > >
> > > > > > -     return f;
> > > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > > +     if (unlikely(error))
> > > > > > +             return ERR_PTR(error);
> > > > > > +
> > > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > > +     if (fake_path) {
> > > > > > +             path_get(fake_path);
> > > > > > +             ff->fake_path = *fake_path;
> > > > > > +     }
> > > > >
> > > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > > NULL.
> > > > >
> > > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > > document that it's required to set ff->fake_path. For callers such as
> > > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > > should set ff->fake_path to file->f_path.
> > > >
> > > > Makes sense.
> > > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > > unneeded if (!error) { nesting depth.
> > >
> > > Yes, please. I had a rough sketch just for my own amusement...
> > 
> > Happy to make your Friday more amusing :-D
> > 
> > >
> > > fs/namei.c
> > >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> > >                                 const struct path *parentpath, umode_t mode,
> > >                                 int open_flag, const struct cred *cred)
> > >   {
> > >           struct file *file;
> > >           int error;
> > >
> > >           file = alloc_empty_file_fake(open_flag, cred);
> > >           if (IS_ERR(file))
> > >                   return file;
> > >
> > >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> > >           if (error) {
> > >                   fput(file);
> > >                   return ERR_PTR(error);
> > >           }
> > >
> > >           return file_set_fake_path(file, &file->f_path);
> > >   }
> > >   EXPORT_SYMBOL(vfs_tmpfile_open);
> > >
> > > fs/internal.h
> > >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path);
> > >
> > > fs/open.c
> > >   struct file *open_with_fake_path(const struct path *fake_path, int flags,
> > >                                    const struct path *path,
> > >                                    const struct cred *cred)
> > >   {
> > >           int error;
> > >           struct file *file;
> > >
> > >           file = alloc_empty_file_fake(flags, cred);
> > >           if (IS_ERR(file))
> > >                   return file;
> > >
> > >           file->f_path = *path;
> > >           error = do_dentry_open(file, d_inode(path->dentry), NULL);
> > >           if (error) {
> > >                   fput(file);
> > >                   return ERR_PTR(error);
> > >           }
> > >
> > >           return file_set_fake_path(file, fake_path);
> > >   }
> > >
> > > fs/file_table.c
> > >   struct file *alloc_empty_file_fake(int flags, const struct cred *cred)
> > >   {
> > >           struct file_fake *ff;
> > >           int error;
> > >
> > >           ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > >           if (unlikely(!ff))
> > >                   return ERR_PTR(-ENOMEM);
> > >
> > >           error = init_file(&ff->file, flags, cred);
> > >           if (unlikely(error))
> > >                   return ERR_PTR(error);
> > >
> > >           ff->file.f_mode |= FMODE_FAKE_PATH;
> > >           return &ff->file;
> > >   }
> > >
> > >   struct file *file_set_fake_path(struct file *file, const struct path *fake_path)
> > >   {
> > >           if (file->f_mode & FMODE_FAKE_PATH) {
> > >                   struct file_fake *ff = file_fake(file);
> > >                   ff->fake_path = *fake_path;
> > >           }
> > >
> > >           return file;
> > >   }
> > >
> > 
> > Heh, I also started with file_set_fake_path() but I decided that it's not
> > worth it, because no code should be messing with this and I just changed
> > file_fake_path() to be non-const and used *file_fake_path(file) = fake_path
> > in these two helpers.
> 
> Hm, I don't understand. This is non-exported and only visible in thing
> that can use internal.h so only core vfs coe.
> 
> The only places that use this are vfs_tmpfile_open() and
> open_with_fake_path(). It allows us to remove the additional argument
> from alloc_empty_file_fake() and that's what I really like because now
> we don't have this pass NULL in vfs_tmpfile_open() and fill in later vs
> doing it in one step.
> 
> Hm, one thing I realized is that this moves vfs_tmpfile_open() out of
> filp_cachep which isn't great, no? That's a pretty heavily used codepath
> so it feels that it should probably continue to use the cache?

Uh, I misread as that's only used in cachefiles and in overlayfs so it's
probably fine. I thought this was the generic version. Though it might
still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct
since there's really no reason why tmpfiles should partake in the fake
path stuff...
Amir Goldstein June 9, 2023, 1:09 p.m. UTC | #7
On Fri, Jun 9, 2023 at 4:00 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:54:32PM +0200, Christian Brauner wrote:
[...]
>
> Uh, I misread as that's only used in cachefiles and in overlayfs so it's
> probably fine. I thought this was the generic version. Though it might
> still be preferable to keep FMODE_NOACCOUNT and FMODE_FAKE_PATH distinct
> since there's really no reason why tmpfiles should partake in the fake
> path stuff...

The reason is (wait for it) no more available bits in f_flags.
Yeh, there is one place left in 0x4000000, but I didn't want to
waste it given that FMODE_NOACCOUNT and FMODE_FAKE_PATH
use cases are pretty close.

BTW, you reminded me that I forgot to CC dhowells (add now).

Thanks,
Amir.
Amir Goldstein June 11, 2023, 7:11 p.m. UTC | #8
On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > is not on the same fs as f_inode.
> > > >
> > > > Allocate a container struct file_fake for those internal files, that
> > > > will be used to hold the fake path qlong with the real path.
> > > >
> > > > This commit does not populate the extra fake_path field and leaves the
> > > > overlayfs internal file's f_path fake.
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >  fs/cachefiles/namei.c |  2 +-
> > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > >  fs/internal.h         |  5 ++-
> > > >  fs/namei.c            |  2 +-
> > > >  fs/open.c             | 11 +++---
> > > >  fs/overlayfs/file.c   |  2 +-
> > > >  include/linux/fs.h    | 13 ++++---
> > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > --- a/fs/cachefiles/namei.c
> > > > +++ b/fs/cachefiles/namei.c
> > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > >       path.mnt = cache->mnt;
> > > >       path.dentry = dentry;
> > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > +                                &path, cache->cache_cred);
> > > >       if (IS_ERR(file)) {
> > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > >                                          PTR_ERR(file),
> > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > index 372653b92617..adc2a92faa52 100644
> > > > --- a/fs/file_table.c
> > > > +++ b/fs/file_table.c
> > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > >
> > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > >
> > > > +/* Container for file with optional fake path to display in /proc files */
> > > > +struct file_fake {
> > > > +     struct file file;
> > > > +     struct path fake_path;
> > > > +};
> > > > +
> > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > +{
> > > > +     return container_of(f, struct file_fake, file);
> > > > +}
> > > > +
> > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > +const struct path *file_fake_path(struct file *f)
> > > > +{
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > +             return &f->f_path;
> > > > +
> > > > +     return &ff->fake_path;
> > > > +}
> > > > +EXPORT_SYMBOL(file_fake_path);
> > > > +
> > > >  static void file_free_rcu(struct rcu_head *head)
> > > >  {
> > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > >
> > > >       put_cred(f->f_cred);
> > > > -     kmem_cache_free(filp_cachep, f);
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             kfree(file_fake(f));
> > > > +     else
> > > > +             kmem_cache_free(filp_cachep, f);
> > > >  }
> > > >
> > > >  static inline void file_free(struct file *f)
> > > >  {
> > > > +     struct file_fake *ff = file_fake(f);
> > > > +
> > > >       security_file_free(f);
> > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > +             path_put(&ff->fake_path);
> > > > +     else
> > > >               percpu_counter_dec(&nr_files);
> > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > >  }
> > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > >  fs_initcall(init_fs_stat_sysctls);
> > > >  #endif
> > > >
> > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > >  {
> > > > -     struct file *f;
> > > >       int error;
> > > >
> > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > -     if (unlikely(!f))
> > > > -             return ERR_PTR(-ENOMEM);
> > > > -
> > > >       f->f_cred = get_cred(cred);
> > > >       error = security_file_alloc(f);
> > > >       if (unlikely(error)) {
> > > >               file_free_rcu(&f->f_rcuhead);
> > > > -             return ERR_PTR(error);
> > > > +             return error;
> > > >       }
> > > >
> > > >       atomic_long_set(&f->f_count, 1);
> > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > >       f->f_mode = OPEN_FMODE(flags);
> > > >       /* f->f_version: 0 */
> > > >
> > > > +     return 0;
> > > > +}
> > > > +
> > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > +{
> > > > +     struct file *f;
> > > > +     int error;
> > > > +
> > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > +     if (unlikely(!f))
> > > > +             return ERR_PTR(-ENOMEM);
> > > > +
> > > > +     error = init_file(f, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > >       return f;
> > > >  }
> > > >
> > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > >  }
> > > >
> > > >  /*
> > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > + * and doesn't check and modify nr_files.
> > > >   *
> > > >   * Should not be used unless there's a very good reason to do so.
> > > >   */
> > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > +                                const struct cred *cred)
> > > >  {
> > > > -     struct file *f = __alloc_file(flags, cred);
> > > > +     struct file_fake *ff;
> > > > +     int error;
> > > >
> > > > -     if (!IS_ERR(f))
> > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > +     if (unlikely(!ff))
> > > > +             return ERR_PTR(-ENOMEM);
> > > >
> > > > -     return f;
> > > > +     error = init_file(&ff->file, flags, cred);
> > > > +     if (unlikely(error))
> > > > +             return ERR_PTR(error);
> > > > +
> > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > +     if (fake_path) {
> > > > +             path_get(fake_path);
> > > > +             ff->fake_path = *fake_path;
> > > > +     }
> > >
> > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > NULL.
> > >
> > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > is an invitation for NULL derefs sooner or later. I would simply
> > > document that it's required to set ff->fake_path. For callers such as
> > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > should set ff->fake_path to file->f_path.
> >
> > Makes sense.
> > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > unneeded if (!error) { nesting depth.
>
> Yes, please. I had a rough sketch just for my own amusement...
>
> fs/namei.c
>   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
>                                 const struct path *parentpath, umode_t mode,
>                                 int open_flag, const struct cred *cred)
>   {
>           struct file *file;
>           int error;
>
>           file = alloc_empty_file_fake(open_flag, cred);
>           if (IS_ERR(file))
>                   return file;
>
>           error = vfs_tmpfile(idmap, parentpath, file, mode);
>           if (error) {
>                   fput(file);
>                   return ERR_PTR(error);
>           }
>
>           return file_set_fake_path(file, &file->f_path);

FYI, this is not enough to guarantee that the fake_path cannot
be empty, for example in fput() above.
So I did keep the real_path empty in this case in v3 and
I have an accessor that verifies that real_path is not empty
before returning it.

Thanks,
Amir.
Christian Brauner June 12, 2023, 7:55 a.m. UTC | #9
On Sun, Jun 11, 2023 at 10:11:29PM +0300, Amir Goldstein wrote:
> On Fri, Jun 9, 2023 at 3:12 PM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Fri, Jun 09, 2023 at 02:57:20PM +0300, Amir Goldstein wrote:
> > > On Fri, Jun 9, 2023 at 2:32 PM Christian Brauner <brauner@kernel.org> wrote:
> > > >
> > > > On Fri, Jun 09, 2023 at 10:32:37AM +0300, Amir Goldstein wrote:
> > > > > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > > > > files, where overlayfs also puts a "fake" path in f_path - a path which
> > > > > is not on the same fs as f_inode.
> > > > >
> > > > > Allocate a container struct file_fake for those internal files, that
> > > > > will be used to hold the fake path qlong with the real path.
> > > > >
> > > > > This commit does not populate the extra fake_path field and leaves the
> > > > > overlayfs internal file's f_path fake.
> > > > >
> > > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > > ---
> > > > >  fs/cachefiles/namei.c |  2 +-
> > > > >  fs/file_table.c       | 85 +++++++++++++++++++++++++++++++++++--------
> > > > >  fs/internal.h         |  5 ++-
> > > > >  fs/namei.c            |  2 +-
> > > > >  fs/open.c             | 11 +++---
> > > > >  fs/overlayfs/file.c   |  2 +-
> > > > >  include/linux/fs.h    | 13 ++++---
> > > > >  7 files changed, 90 insertions(+), 30 deletions(-)
> > > > >
> > > > > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > > > > index 82219a8f6084..a71bdf2d03ba 100644
> > > > > --- a/fs/cachefiles/namei.c
> > > > > +++ b/fs/cachefiles/namei.c
> > > > > @@ -561,7 +561,7 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> > > > >       path.mnt = cache->mnt;
> > > > >       path.dentry = dentry;
> > > > >       file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > > > > -                                d_backing_inode(dentry), cache->cache_cred);
> > > > > +                                &path, cache->cache_cred);
> > > > >       if (IS_ERR(file)) {
> > > > >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> > > > >                                          PTR_ERR(file),
> > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > index 372653b92617..adc2a92faa52 100644
> > > > > --- a/fs/file_table.c
> > > > > +++ b/fs/file_table.c
> > > > > @@ -44,18 +44,48 @@ static struct kmem_cache *filp_cachep __read_mostly;
> > > > >
> > > > >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> > > > >
> > > > > +/* Container for file with optional fake path to display in /proc files */
> > > > > +struct file_fake {
> > > > > +     struct file file;
> > > > > +     struct path fake_path;
> > > > > +};
> > > > > +
> > > > > +static inline struct file_fake *file_fake(struct file *f)
> > > > > +{
> > > > > +     return container_of(f, struct file_fake, file);
> > > > > +}
> > > > > +
> > > > > +/* Returns fake_path if one exists, f_path otherwise */
> > > > > +const struct path *file_fake_path(struct file *f)
> > > > > +{
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > > +     if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
> > > > > +             return &f->f_path;
> > > > > +
> > > > > +     return &ff->fake_path;
> > > > > +}
> > > > > +EXPORT_SYMBOL(file_fake_path);
> > > > > +
> > > > >  static void file_free_rcu(struct rcu_head *head)
> > > > >  {
> > > > >       struct file *f = container_of(head, struct file, f_rcuhead);
> > > > >
> > > > >       put_cred(f->f_cred);
> > > > > -     kmem_cache_free(filp_cachep, f);
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             kfree(file_fake(f));
> > > > > +     else
> > > > > +             kmem_cache_free(filp_cachep, f);
> > > > >  }
> > > > >
> > > > >  static inline void file_free(struct file *f)
> > > > >  {
> > > > > +     struct file_fake *ff = file_fake(f);
> > > > > +
> > > > >       security_file_free(f);
> > > > > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > > > > +     if (f->f_mode & FMODE_FAKE_PATH)
> > > > > +             path_put(&ff->fake_path);
> > > > > +     else
> > > > >               percpu_counter_dec(&nr_files);
> > > > >       call_rcu(&f->f_rcuhead, file_free_rcu);
> > > > >  }
> > > > > @@ -131,20 +161,15 @@ static int __init init_fs_stat_sysctls(void)
> > > > >  fs_initcall(init_fs_stat_sysctls);
> > > > >  #endif
> > > > >
> > > > > -static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +static int init_file(struct file *f, int flags, const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f;
> > > > >       int error;
> > > > >
> > > > > -     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > -     if (unlikely(!f))
> > > > > -             return ERR_PTR(-ENOMEM);
> > > > > -
> > > > >       f->f_cred = get_cred(cred);
> > > > >       error = security_file_alloc(f);
> > > > >       if (unlikely(error)) {
> > > > >               file_free_rcu(&f->f_rcuhead);
> > > > > -             return ERR_PTR(error);
> > > > > +             return error;
> > > > >       }
> > > > >
> > > > >       atomic_long_set(&f->f_count, 1);
> > > > > @@ -155,6 +180,22 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > >       f->f_mode = OPEN_FMODE(flags);
> > > > >       /* f->f_version: 0 */
> > > > >
> > > > > +     return 0;
> > > > > +}
> > > > > +
> > > > > +static struct file *__alloc_file(int flags, const struct cred *cred)
> > > > > +{
> > > > > +     struct file *f;
> > > > > +     int error;
> > > > > +
> > > > > +     f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> > > > > +     if (unlikely(!f))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > > +
> > > > > +     error = init_file(f, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > >       return f;
> > > > >  }
> > > > >
> > > > > @@ -201,18 +242,32 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
> > > > >  }
> > > > >
> > > > >  /*
> > > > > - * Variant of alloc_empty_file() that doesn't check and modify nr_files.
> > > > > + * Variant of alloc_empty_file() that allocates a file_fake container
> > > > > + * and doesn't check and modify nr_files.
> > > > >   *
> > > > >   * Should not be used unless there's a very good reason to do so.
> > > > >   */
> > > > > -struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
> > > > > +struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
> > > > > +                                const struct cred *cred)
> > > > >  {
> > > > > -     struct file *f = __alloc_file(flags, cred);
> > > > > +     struct file_fake *ff;
> > > > > +     int error;
> > > > >
> > > > > -     if (!IS_ERR(f))
> > > > > -             f->f_mode |= FMODE_NOACCOUNT;
> > > > > +     ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
> > > > > +     if (unlikely(!ff))
> > > > > +             return ERR_PTR(-ENOMEM);
> > > > >
> > > > > -     return f;
> > > > > +     error = init_file(&ff->file, flags, cred);
> > > > > +     if (unlikely(error))
> > > > > +             return ERR_PTR(error);
> > > > > +
> > > > > +     ff->file.f_mode |= FMODE_FAKE_PATH;
> > > > > +     if (fake_path) {
> > > > > +             path_get(fake_path);
> > > > > +             ff->fake_path = *fake_path;
> > > > > +     }
> > > >
> > > > Hm, I see that this check is mostly done for vfs_tmpfile_open() which
> > > > only fills in file->f_path in vfs_tmpfile() but leaves ff->fake_path
> > > > NULL.
> > > >
> > > > So really I think having FMODE_FAKE_PATH set but ff->fake_path be NULL
> > > > is an invitation for NULL derefs sooner or later. I would simply
> > > > document that it's required to set ff->fake_path. For callers such as
> > > > vfs_tmpfile_open() it can just be path itself. IOW, vfs_tmpfile_open()
> > > > should set ff->fake_path to file->f_path.
> > >
> > > Makes sense.
> > > I also took the liberty to re-arrange vfs_tmpfile_open() without the
> > > unneeded if (!error) { nesting depth.
> >
> > Yes, please. I had a rough sketch just for my own amusement...
> >
> > fs/namei.c
> >   struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
> >                                 const struct path *parentpath, umode_t mode,
> >                                 int open_flag, const struct cred *cred)
> >   {
> >           struct file *file;
> >           int error;
> >
> >           file = alloc_empty_file_fake(open_flag, cred);
> >           if (IS_ERR(file))
> >                   return file;
> >
> >           error = vfs_tmpfile(idmap, parentpath, file, mode);
> >           if (error) {
> >                   fput(file);
> >                   return ERR_PTR(error);
> >           }
> >
> >           return file_set_fake_path(file, &file->f_path);
> 
> FYI, this is not enough to guarantee that the fake_path cannot
> be empty, for example in fput() above.
> So I did keep the real_path empty in this case in v3 and
> I have an accessor that verifies that real_path is not empty
> before returning it.

Ok, I'm just making it to v3 now.
diff mbox series

Patch

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 82219a8f6084..a71bdf2d03ba 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -561,7 +561,7 @@  static bool cachefiles_open_file(struct cachefiles_object *object,
 	path.mnt = cache->mnt;
 	path.dentry = dentry;
 	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
-				   d_backing_inode(dentry), cache->cache_cred);
+				   &path, cache->cache_cred);
 	if (IS_ERR(file)) {
 		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
 					   PTR_ERR(file),
diff --git a/fs/file_table.c b/fs/file_table.c
index 372653b92617..adc2a92faa52 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,48 @@  static struct kmem_cache *filp_cachep __read_mostly;
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
+/* Container for file with optional fake path to display in /proc files */
+struct file_fake {
+	struct file file;
+	struct path fake_path;
+};
+
+static inline struct file_fake *file_fake(struct file *f)
+{
+	return container_of(f, struct file_fake, file);
+}
+
+/* Returns fake_path if one exists, f_path otherwise */
+const struct path *file_fake_path(struct file *f)
+{
+	struct file_fake *ff = file_fake(f);
+
+	if (!(f->f_mode & FMODE_FAKE_PATH) || !ff->fake_path.dentry)
+		return &f->f_path;
+
+	return &ff->fake_path;
+}
+EXPORT_SYMBOL(file_fake_path);
+
 static void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f = container_of(head, struct file, f_rcuhead);
 
 	put_cred(f->f_cred);
-	kmem_cache_free(filp_cachep, f);
+	if (f->f_mode & FMODE_FAKE_PATH)
+		kfree(file_fake(f));
+	else
+		kmem_cache_free(filp_cachep, f);
 }
 
 static inline void file_free(struct file *f)
 {
+	struct file_fake *ff = file_fake(f);
+
 	security_file_free(f);
-	if (!(f->f_mode & FMODE_NOACCOUNT))
+	if (f->f_mode & FMODE_FAKE_PATH)
+		path_put(&ff->fake_path);
+	else
 		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_rcuhead, file_free_rcu);
 }
@@ -131,20 +161,15 @@  static int __init init_fs_stat_sysctls(void)
 fs_initcall(init_fs_stat_sysctls);
 #endif
 
-static struct file *__alloc_file(int flags, const struct cred *cred)
+static int init_file(struct file *f, int flags, const struct cred *cred)
 {
-	struct file *f;
 	int error;
 
-	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-	if (unlikely(!f))
-		return ERR_PTR(-ENOMEM);
-
 	f->f_cred = get_cred(cred);
 	error = security_file_alloc(f);
 	if (unlikely(error)) {
 		file_free_rcu(&f->f_rcuhead);
-		return ERR_PTR(error);
+		return error;
 	}
 
 	atomic_long_set(&f->f_count, 1);
@@ -155,6 +180,22 @@  static struct file *__alloc_file(int flags, const struct cred *cred)
 	f->f_mode = OPEN_FMODE(flags);
 	/* f->f_version: 0 */
 
+	return 0;
+}
+
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+	if (unlikely(!f))
+		return ERR_PTR(-ENOMEM);
+
+	error = init_file(f, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
 	return f;
 }
 
@@ -201,18 +242,32 @@  struct file *alloc_empty_file(int flags, const struct cred *cred)
 }
 
 /*
- * Variant of alloc_empty_file() that doesn't check and modify nr_files.
+ * Variant of alloc_empty_file() that allocates a file_fake container
+ * and doesn't check and modify nr_files.
  *
  * Should not be used unless there's a very good reason to do so.
  */
-struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
+struct file *alloc_empty_file_fake(const struct path *fake_path, int flags,
+				   const struct cred *cred)
 {
-	struct file *f = __alloc_file(flags, cred);
+	struct file_fake *ff;
+	int error;
 
-	if (!IS_ERR(f))
-		f->f_mode |= FMODE_NOACCOUNT;
+	ff = kzalloc(sizeof(struct file_fake), GFP_KERNEL);
+	if (unlikely(!ff))
+		return ERR_PTR(-ENOMEM);
 
-	return f;
+	error = init_file(&ff->file, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	ff->file.f_mode |= FMODE_FAKE_PATH;
+	if (fake_path) {
+		path_get(fake_path);
+		ff->fake_path = *fake_path;
+	}
+
+	return &ff->file;
 }
 
 /**
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..8890c694745b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@  extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+extern struct file *alloc_empty_file(int flags, const struct cred *cred);
+extern struct file *alloc_empty_file_fake(const struct path *fake_path,
+					  int flags, const struct cred *cred);
 
 static inline void put_file_access(struct file *file)
 {
diff --git a/fs/namei.c b/fs/namei.c
index e4fe0879ae55..5e6de1f29f4e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3721,7 +3721,7 @@  struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
 	struct file *file;
 	int error;
 
-	file = alloc_empty_file_noaccount(open_flag, cred);
+	file = alloc_empty_file_fake(NULL, open_flag, cred);
 	if (!IS_ERR(file)) {
 		error = vfs_tmpfile(idmap, parentpath, file, mode);
 		if (error) {
diff --git a/fs/open.c b/fs/open.c
index 4478adcc4f3a..c9e2300a037d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1116,15 +1116,16 @@  struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 }
 EXPORT_SYMBOL(dentry_create);
 
-struct file *open_with_fake_path(const struct path *path, int flags,
-				struct inode *inode, const struct cred *cred)
+struct file *open_with_fake_path(const struct path *fake_path, int flags,
+				 const struct path *path,
+				 const struct cred *cred)
 {
-	struct file *f = alloc_empty_file_noaccount(flags, cred);
+	struct file *f = alloc_empty_file_fake(NULL, flags, cred);
 	if (!IS_ERR(f)) {
 		int error;
 
-		f->f_path = *path;
-		error = do_dentry_open(f, inode, NULL);
+		f->f_path = *fake_path;
+		error = do_dentry_open(f, d_inode(path->dentry), NULL);
 		if (error) {
 			fput(f);
 			f = ERR_PTR(error);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 7c04f033aadd..f5987377e9eb 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -61,7 +61,7 @@  static struct file *ovl_open_realfile(const struct file *file,
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
 
-		realfile = open_with_fake_path(&file->f_path, flags, realinode,
+		realfile = open_with_fake_path(&file->f_path, flags, realpath,
 					       current_cred());
 	}
 	revert_creds(old_cred);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..b112a3c9b499 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -180,8 +180,8 @@  typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File represents mount that needs unmounting */
 #define FMODE_NEED_UNMOUNT	((__force fmode_t)0x10000000)
 
-/* File does not contribute to nr_files count */
-#define FMODE_NOACCOUNT		((__force fmode_t)0x20000000)
+/* File is embedded in file_fake and doesn't contribute to nr_files */
+#define FMODE_FAKE_PATH		((__force fmode_t)0x20000000)
 
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
@@ -2349,11 +2349,14 @@  static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
 	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
 			      name, flags, mode);
 }
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
+extern struct file *dentry_open(const struct path *path, int flags,
+				const struct cred *creds);
 extern struct file *dentry_create(const struct path *path, int flags,
 				  umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
-					 struct inode*, const struct cred *);
+extern struct file *open_with_fake_path(const struct path *fake_path, int flags,
+					const struct path *path,
+					const struct cred *cred);
+extern const struct path *file_fake_path(struct file *file);
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);