diff mbox series

[v4,1/2] fs: use backing_file container for internal files with "fake" f_path

Message ID 20230614074907.1943007-2-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series Handle notifications on overlayfs fake path files | expand

Commit Message

Amir Goldstein June 14, 2023, 7:49 a.m. UTC
Overlayfs and cachefiles use open_with_fake_path() to allocate internal
files, where overlayfs also puts a "fake" path in f_path - a path which
is not on the same fs as f_inode.

Allocate a container struct backing_file for those internal files, that
is used to hold the "fake" ovl path along with the real path.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/cachefiles/namei.c |  4 +--
 fs/file_table.c       | 74 +++++++++++++++++++++++++++++++++++++------
 fs/internal.h         |  5 +--
 fs/open.c             | 30 +++++++++++-------
 fs/overlayfs/file.c   |  4 +--
 include/linux/fs.h    | 24 +++++++++++---
 6 files changed, 109 insertions(+), 32 deletions(-)

Comments

Christian Brauner June 14, 2023, 1:26 p.m. UTC | #1
On Wed, Jun 14, 2023 at 10:49:06AM +0300, Amir Goldstein wrote:
> Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> files, where overlayfs also puts a "fake" path in f_path - a path which
> is not on the same fs as f_inode.
> 
> Allocate a container struct backing_file for those internal files, that
> is used to hold the "fake" ovl path along with the real path.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/cachefiles/namei.c |  4 +--
>  fs/file_table.c       | 74 +++++++++++++++++++++++++++++++++++++------
>  fs/internal.h         |  5 +--
>  fs/open.c             | 30 +++++++++++-------
>  fs/overlayfs/file.c   |  4 +--
>  include/linux/fs.h    | 24 +++++++++++---
>  6 files changed, 109 insertions(+), 32 deletions(-)
> 
> diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> index 82219a8f6084..283534c6bc8d 100644
> --- a/fs/cachefiles/namei.c
> +++ b/fs/cachefiles/namei.c
> @@ -560,8 +560,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
>  	 */
>  	path.mnt = cache->mnt;
>  	path.dentry = dentry;
> -	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> -				   d_backing_inode(dentry), cache->cache_cred);
> +	file = open_backing_file(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> +				 &path, cache->cache_cred);
>  	if (IS_ERR(file)) {
>  		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
>  					   PTR_ERR(file),
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 372653b92617..138d5d405df7 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
>  
>  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
>  
> +/* Container for backing file with optional real path */
> +struct backing_file {
> +	struct file file;
> +	struct path real_path;
> +};
> +
> +static inline struct backing_file *backing_file(struct file *f)
> +{
> +	return container_of(f, struct backing_file, file);
> +}
> +
> +struct path *backing_file_real_path(struct file *f)
> +{
> +	return &backing_file(f)->real_path;
> +}
> +EXPORT_SYMBOL_GPL(backing_file_real_path);
> +
>  static void file_free_rcu(struct rcu_head *head)
>  {
>  	struct file *f = container_of(head, struct file, f_rcuhead);
>  
>  	put_cred(f->f_cred);
> -	kmem_cache_free(filp_cachep, f);
> +	if (unlikely(f->f_mode & FMODE_BACKING))
> +		kfree(backing_file(f));
> +	else
> +		kmem_cache_free(filp_cachep, f);
>  }
>  
>  static inline void file_free(struct file *f)
>  {
>  	security_file_free(f);
> -	if (!(f->f_mode & FMODE_NOACCOUNT))
> +	if (unlikely(f->f_mode & FMODE_BACKING))
> +		path_put(backing_file_real_path(f));
> +	else
>  		percpu_counter_dec(&nr_files);

I think this needs to be:

if (unlikely(f->f_mode & FMODE_BACKING))
        path_put(backing_file_real_path(f));

if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
        percpu_counter_dec(&nr_files);

as we do have FMODE_NOACCOUNT without FMODE_BACKING.

No need to resend though.
Amir Goldstein June 14, 2023, 1:48 p.m. UTC | #2
On Wed, Jun 14, 2023 at 4:26 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Jun 14, 2023 at 10:49:06AM +0300, Amir Goldstein wrote:
> > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > files, where overlayfs also puts a "fake" path in f_path - a path which
> > is not on the same fs as f_inode.
> >
> > Allocate a container struct backing_file for those internal files, that
> > is used to hold the "fake" ovl path along with the real path.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/cachefiles/namei.c |  4 +--
> >  fs/file_table.c       | 74 +++++++++++++++++++++++++++++++++++++------
> >  fs/internal.h         |  5 +--
> >  fs/open.c             | 30 +++++++++++-------
> >  fs/overlayfs/file.c   |  4 +--
> >  include/linux/fs.h    | 24 +++++++++++---
> >  6 files changed, 109 insertions(+), 32 deletions(-)
> >
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 82219a8f6084..283534c6bc8d 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -560,8 +560,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> >        */
> >       path.mnt = cache->mnt;
> >       path.dentry = dentry;
> > -     file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > -                                d_backing_inode(dentry), cache->cache_cred);
> > +     file = open_backing_file(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > +                              &path, cache->cache_cred);
> >       if (IS_ERR(file)) {
> >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> >                                          PTR_ERR(file),
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 372653b92617..138d5d405df7 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
> >
> >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> >
> > +/* Container for backing file with optional real path */
> > +struct backing_file {
> > +     struct file file;
> > +     struct path real_path;
> > +};
> > +
> > +static inline struct backing_file *backing_file(struct file *f)
> > +{
> > +     return container_of(f, struct backing_file, file);
> > +}
> > +
> > +struct path *backing_file_real_path(struct file *f)
> > +{
> > +     return &backing_file(f)->real_path;
> > +}
> > +EXPORT_SYMBOL_GPL(backing_file_real_path);
> > +
> >  static void file_free_rcu(struct rcu_head *head)
> >  {
> >       struct file *f = container_of(head, struct file, f_rcuhead);
> >
> >       put_cred(f->f_cred);
> > -     kmem_cache_free(filp_cachep, f);
> > +     if (unlikely(f->f_mode & FMODE_BACKING))
> > +             kfree(backing_file(f));
> > +     else
> > +             kmem_cache_free(filp_cachep, f);
> >  }
> >
> >  static inline void file_free(struct file *f)
> >  {
> >       security_file_free(f);
> > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > +     if (unlikely(f->f_mode & FMODE_BACKING))
> > +             path_put(backing_file_real_path(f));
> > +     else
> >               percpu_counter_dec(&nr_files);
>
> I think this needs to be:
>
> if (unlikely(f->f_mode & FMODE_BACKING))
>         path_put(backing_file_real_path(f));
>
> if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
>         percpu_counter_dec(&nr_files);
>
> as we do have FMODE_NOACCOUNT without FMODE_BACKING.
>

Ouch! good catch!

Thanks,
Amir.
Christoph Hellwig June 15, 2023, 4:24 a.m. UTC | #3
On Wed, Jun 14, 2023 at 10:49:06AM +0300, Amir Goldstein wrote:
> +static struct file *__alloc_file(int flags, const struct cred *cred)
> +{
> +	struct file *f;
> +	int error;
> +
> +	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
> +	if (unlikely(!f))
> +		return ERR_PTR(-ENOMEM);
> +
> +	error = init_file(f, flags, cred);
> +	if (unlikely(error))
> +		return ERR_PTR(error);
> +
>  	return f;

Nit: is there much of a point in keeping this now very trivial helper
instead of open coding it in the two callers?

> +/*
> + * Variant of alloc_empty_file() that allocates a backing_file container
> + * and doesn't check and modify nr_files.
> + *
> + * Should not be used unless there's a very good reason to do so.

I'm not sure this comment is all that helpful..  I'd rather explain
when it should be used (i.e. only from open_backing_file) then when
it should not..

> -struct file *open_with_fake_path(const struct path *path, int flags,
> -				struct inode *inode, const struct cred *cred)
> +struct file *open_backing_file(const struct path *path, int flags,
> +			       const struct path *real_path,
> +			       const struct cred *cred)

Please write a big fat comment on where this function should and should
not be used and how it works.

> +struct path *backing_file_real_path(struct file *f);
> +static inline const struct path *f_real_path(struct file *f)
> +{
> +	if (unlikely(f->f_mode & FMODE_BACKING))
> +		return backing_file_real_path(f);
> +	else
> +		return &f->f_path;
> +}

Nit: no need for the else here.
Amir Goldstein June 15, 2023, 11:32 a.m. UTC | #4
On Wed, Jun 14, 2023 at 4:26 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Jun 14, 2023 at 10:49:06AM +0300, Amir Goldstein wrote:
> > Overlayfs and cachefiles use open_with_fake_path() to allocate internal
> > files, where overlayfs also puts a "fake" path in f_path - a path which
> > is not on the same fs as f_inode.
> >
> > Allocate a container struct backing_file for those internal files, that
> > is used to hold the "fake" ovl path along with the real path.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/cachefiles/namei.c |  4 +--
> >  fs/file_table.c       | 74 +++++++++++++++++++++++++++++++++++++------
> >  fs/internal.h         |  5 +--
> >  fs/open.c             | 30 +++++++++++-------
> >  fs/overlayfs/file.c   |  4 +--
> >  include/linux/fs.h    | 24 +++++++++++---
> >  6 files changed, 109 insertions(+), 32 deletions(-)
> >
> > diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
> > index 82219a8f6084..283534c6bc8d 100644
> > --- a/fs/cachefiles/namei.c
> > +++ b/fs/cachefiles/namei.c
> > @@ -560,8 +560,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
> >        */
> >       path.mnt = cache->mnt;
> >       path.dentry = dentry;
> > -     file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > -                                d_backing_inode(dentry), cache->cache_cred);
> > +     file = open_backing_file(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
> > +                              &path, cache->cache_cred);
> >       if (IS_ERR(file)) {
> >               trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
> >                                          PTR_ERR(file),
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 372653b92617..138d5d405df7 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
> >
> >  static struct percpu_counter nr_files __cacheline_aligned_in_smp;
> >
> > +/* Container for backing file with optional real path */
> > +struct backing_file {
> > +     struct file file;
> > +     struct path real_path;
> > +};
> > +
> > +static inline struct backing_file *backing_file(struct file *f)
> > +{
> > +     return container_of(f, struct backing_file, file);
> > +}
> > +
> > +struct path *backing_file_real_path(struct file *f)
> > +{
> > +     return &backing_file(f)->real_path;
> > +}
> > +EXPORT_SYMBOL_GPL(backing_file_real_path);
> > +
> >  static void file_free_rcu(struct rcu_head *head)
> >  {
> >       struct file *f = container_of(head, struct file, f_rcuhead);
> >
> >       put_cred(f->f_cred);
> > -     kmem_cache_free(filp_cachep, f);
> > +     if (unlikely(f->f_mode & FMODE_BACKING))
> > +             kfree(backing_file(f));
> > +     else
> > +             kmem_cache_free(filp_cachep, f);
> >  }
> >
> >  static inline void file_free(struct file *f)
> >  {
> >       security_file_free(f);
> > -     if (!(f->f_mode & FMODE_NOACCOUNT))
> > +     if (unlikely(f->f_mode & FMODE_BACKING))
> > +             path_put(backing_file_real_path(f));
> > +     else
> >               percpu_counter_dec(&nr_files);
>
> I think this needs to be:
>
> if (unlikely(f->f_mode & FMODE_BACKING))
>         path_put(backing_file_real_path(f));
>
> if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
>         percpu_counter_dec(&nr_files);
>
> as we do have FMODE_NOACCOUNT without FMODE_BACKING.
>
> No need to resend though.

Ay! forgot to include this in v5.
Please fix it for me again.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 82219a8f6084..283534c6bc8d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -560,8 +560,8 @@  static bool cachefiles_open_file(struct cachefiles_object *object,
 	 */
 	path.mnt = cache->mnt;
 	path.dentry = dentry;
-	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
-				   d_backing_inode(dentry), cache->cache_cred);
+	file = open_backing_file(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
+				 &path, cache->cache_cred);
 	if (IS_ERR(file)) {
 		trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
 					   PTR_ERR(file),
diff --git a/fs/file_table.c b/fs/file_table.c
index 372653b92617..138d5d405df7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -44,18 +44,40 @@  static struct kmem_cache *filp_cachep __read_mostly;
 
 static struct percpu_counter nr_files __cacheline_aligned_in_smp;
 
+/* Container for backing file with optional real path */
+struct backing_file {
+	struct file file;
+	struct path real_path;
+};
+
+static inline struct backing_file *backing_file(struct file *f)
+{
+	return container_of(f, struct backing_file, file);
+}
+
+struct path *backing_file_real_path(struct file *f)
+{
+	return &backing_file(f)->real_path;
+}
+EXPORT_SYMBOL_GPL(backing_file_real_path);
+
 static void file_free_rcu(struct rcu_head *head)
 {
 	struct file *f = container_of(head, struct file, f_rcuhead);
 
 	put_cred(f->f_cred);
-	kmem_cache_free(filp_cachep, f);
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		kfree(backing_file(f));
+	else
+		kmem_cache_free(filp_cachep, f);
 }
 
 static inline void file_free(struct file *f)
 {
 	security_file_free(f);
-	if (!(f->f_mode & FMODE_NOACCOUNT))
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		path_put(backing_file_real_path(f));
+	else
 		percpu_counter_dec(&nr_files);
 	call_rcu(&f->f_rcuhead, file_free_rcu);
 }
@@ -131,20 +153,15 @@  static int __init init_fs_stat_sysctls(void)
 fs_initcall(init_fs_stat_sysctls);
 #endif
 
-static struct file *__alloc_file(int flags, const struct cred *cred)
+static int init_file(struct file *f, int flags, const struct cred *cred)
 {
-	struct file *f;
 	int error;
 
-	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
-	if (unlikely(!f))
-		return ERR_PTR(-ENOMEM);
-
 	f->f_cred = get_cred(cred);
 	error = security_file_alloc(f);
 	if (unlikely(error)) {
 		file_free_rcu(&f->f_rcuhead);
-		return ERR_PTR(error);
+		return error;
 	}
 
 	atomic_long_set(&f->f_count, 1);
@@ -155,6 +172,22 @@  static struct file *__alloc_file(int flags, const struct cred *cred)
 	f->f_mode = OPEN_FMODE(flags);
 	/* f->f_version: 0 */
 
+	return 0;
+}
+
+static struct file *__alloc_file(int flags, const struct cred *cred)
+{
+	struct file *f;
+	int error;
+
+	f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
+	if (unlikely(!f))
+		return ERR_PTR(-ENOMEM);
+
+	error = init_file(f, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
 	return f;
 }
 
@@ -215,6 +248,29 @@  struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
 	return f;
 }
 
+/*
+ * Variant of alloc_empty_file() that allocates a backing_file container
+ * and doesn't check and modify nr_files.
+ *
+ * Should not be used unless there's a very good reason to do so.
+ */
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
+{
+	struct backing_file *ff;
+	int error;
+
+	ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
+	if (unlikely(!ff))
+		return ERR_PTR(-ENOMEM);
+
+	error = init_file(&ff->file, flags, cred);
+	if (unlikely(error))
+		return ERR_PTR(error);
+
+	ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
+	return &ff->file;
+}
+
 /**
  * alloc_file - allocate and initialize a 'struct file'
  *
diff --git a/fs/internal.h b/fs/internal.h
index bd3b2810a36b..9c31078e0d16 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,8 +97,9 @@  extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern struct file *alloc_empty_file(int, const struct cred *);
-extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
+struct file *alloc_empty_file(int flags, const struct cred *cred);
+struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
+struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
 
 static inline void put_file_access(struct file *file)
 {
diff --git a/fs/open.c b/fs/open.c
index 005ca91a173b..8f745e07a037 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1121,23 +1121,29 @@  struct file *dentry_create(const struct path *path, int flags, umode_t mode,
 }
 EXPORT_SYMBOL(dentry_create);
 
-struct file *open_with_fake_path(const struct path *path, int flags,
-				struct inode *inode, const struct cred *cred)
+struct file *open_backing_file(const struct path *path, int flags,
+			       const struct path *real_path,
+			       const struct cred *cred)
 {
-	struct file *f = alloc_empty_file_noaccount(flags, cred);
-	if (!IS_ERR(f)) {
-		int error;
+	struct file *f;
+	int error;
 
-		f->f_path = *path;
-		error = do_dentry_open(f, inode, NULL);
-		if (error) {
-			fput(f);
-			f = ERR_PTR(error);
-		}
+	f = alloc_empty_backing_file(flags, cred);
+	if (IS_ERR(f))
+		return f;
+
+	f->f_path = *path;
+	path_get(real_path);
+	*backing_file_real_path(f) = *real_path;
+	error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
+	if (error) {
+		fput(f);
+		f = ERR_PTR(error);
 	}
+
 	return f;
 }
-EXPORT_SYMBOL(open_with_fake_path);
+EXPORT_SYMBOL(open_backing_file);
 
 #define WILL_CREATE(flags)	(flags & (O_CREAT | __O_TMPFILE))
 #define O_PATH_FLAGS		(O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 39737c2aaa84..8cf099aa97de 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -61,8 +61,8 @@  static struct file *ovl_open_realfile(const struct file *file,
 		if (!inode_owner_or_capable(real_idmap, realinode))
 			flags &= ~O_NOATIME;
 
-		realfile = open_with_fake_path(&file->f_path, flags, realinode,
-					       current_cred());
+		realfile = open_backing_file(&file->f_path, flags, realpath,
+					     current_cred());
 	}
 	revert_creds(old_cred);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 21a981680856..8393408b9885 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -171,6 +171,9 @@  typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports non-exclusive O_DIRECT writes from multiple threads */
 #define FMODE_DIO_PARALLEL_WRITE	((__force fmode_t)0x1000000)
 
+/* File is embedded in backing_file object */
+#define FMODE_BACKING		((__force fmode_t)0x2000000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
@@ -2349,11 +2352,22 @@  static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
 	return file_open_root(&(struct path){.mnt = mnt, .dentry = mnt->mnt_root},
 			      name, flags, mode);
 }
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern struct file *dentry_create(const struct path *path, int flags,
-				  umode_t mode, const struct cred *cred);
-extern struct file * open_with_fake_path(const struct path *, int,
-					 struct inode*, const struct cred *);
+struct file *dentry_open(const struct path *path, int flags,
+			 const struct cred *creds);
+struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+			   const struct cred *cred);
+struct file *open_backing_file(const struct path *path, int flags,
+			       const struct path *real_path,
+			       const struct cred *cred);
+struct path *backing_file_real_path(struct file *f);
+static inline const struct path *f_real_path(struct file *f)
+{
+	if (unlikely(f->f_mode & FMODE_BACKING))
+		return backing_file_real_path(f);
+	else
+		return &f->f_path;
+}
+
 static inline struct file *file_clone_open(struct file *file)
 {
 	return dentry_open(&file->f_path, file->f_flags, file->f_cred);