diff mbox

[RFC,09/35] ovl: stack file ops

Message ID 20180412150826.20988-10-mszeredi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Miklos Szeredi April 12, 2018, 3:08 p.m. UTC
Implement file operations on a regular overlay file.  The underlying file
is opened separately and cached in ->private_data.

It might be worth making an exception for such files when accounting in
nr_file to confirm to userspace expectations.  We are only adding a small
overhead (248bytes for the struct file) since the real inode and dentry are
pinned by overlayfs anyway.

This patch doesn't have any effect, since the vfs will use d_real() to find
the real underlying file to open.  The patch at the end of the series will
actually enable this functionality.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/overlayfs/Makefile    |  4 +--
 fs/overlayfs/file.c      | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/overlayfs/inode.c     |  1 +
 fs/overlayfs/overlayfs.h |  3 ++
 4 files changed, 82 insertions(+), 2 deletions(-)
 create mode 100644 fs/overlayfs/file.c

Comments

Vivek Goyal April 26, 2018, 2:13 p.m. UTC | #1
On Thu, Apr 12, 2018 at 05:08:00PM +0200, Miklos Szeredi wrote:

[..]
> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> new file mode 100644
> index 000000000000..a0b606885c41
> --- /dev/null
> +++ b/fs/overlayfs/file.c
> @@ -0,0 +1,76 @@
> +/*
> + * Copyright (C) 2017 Red Hat, Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + */
> +
> +#include <linux/cred.h>
> +#include <linux/file.h>
> +#include <linux/xattr.h>
> +#include "overlayfs.h"
> +
> +static struct file *ovl_open_realfile(const struct file *file)
> +{
> +	struct inode *inode = file_inode(file);
> +	struct inode *upperinode = ovl_inode_upper(inode);
> +	struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
> +	struct file *realfile;
> +	const struct cred *old_cred;
> +
> +	old_cred = ovl_override_creds(inode->i_sb);
> +	realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
> +			     realinode, current_cred(), false);
> +	revert_creds(old_cred);
> +
> +	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
> +		 file, file, upperinode ? 'u' : 'l', file->f_flags,
> +		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
> +
> +	return realfile;
> +}
> +
> +static int ovl_open(struct inode *inode, struct file *file)
> +{
> +	struct dentry *dentry = file_dentry(file);

Hi Miklos,

There is one thing I can't wrap my head around, so I better ask.

file_dentry() will call ovl_d_real() and try to find dentry based on
inode installed in f->f_inode. If ovl_d_real() can't find inode dentry
matching the passed in inode, it warns.

Assume, I have a stacked overlay configuration. Let me call top level
overlay layer ovl1 and lower level overlay layer ovl2. Say I open a 
file foo.txt. Now ovl_open() in ovl1 decides that realinode is a lower
inode and installs that inode f->f_inode of realfile. (This should be
ovl2 layer inode, let me call it ovl2_inode). Now ovl_open() of ovl2 layer
will be called and it will call file_dentry() and will look for dentry
corresponding to ovl2_inode. I am wondering what if a copy up of foo.txt
was triggered in ovl1 and by the time we called ovl_d_real(dentry,
ovl2_inode), it will start comparing with inode of ovl1_upper and never
find ovl2_inode.

IOW, I am not able to figure out how do we protect agains copy up races
when ovl_open() calls file_dentry().

Thanks
Vivek
Miklos Szeredi April 26, 2018, 2:43 p.m. UTC | #2
On Thu, Apr 26, 2018 at 4:13 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Thu, Apr 12, 2018 at 05:08:00PM +0200, Miklos Szeredi wrote:
>
> [..]
>> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> new file mode 100644
>> index 000000000000..a0b606885c41
>> --- /dev/null
>> +++ b/fs/overlayfs/file.c
>> @@ -0,0 +1,76 @@
>> +/*
>> + * Copyright (C) 2017 Red Hat, Inc.
>> + *
>> + * This program is free software; you can redistribute it and/or modify it
>> + * under the terms of the GNU General Public License version 2 as published by
>> + * the Free Software Foundation.
>> + */
>> +
>> +#include <linux/cred.h>
>> +#include <linux/file.h>
>> +#include <linux/xattr.h>
>> +#include "overlayfs.h"
>> +
>> +static struct file *ovl_open_realfile(const struct file *file)
>> +{
>> +     struct inode *inode = file_inode(file);
>> +     struct inode *upperinode = ovl_inode_upper(inode);
>> +     struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
>> +     struct file *realfile;
>> +     const struct cred *old_cred;
>> +
>> +     old_cred = ovl_override_creds(inode->i_sb);
>> +     realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
>> +                          realinode, current_cred(), false);
>> +     revert_creds(old_cred);
>> +
>> +     pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
>> +              file, file, upperinode ? 'u' : 'l', file->f_flags,
>> +              realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
>> +
>> +     return realfile;
>> +}
>> +
>> +static int ovl_open(struct inode *inode, struct file *file)
>> +{
>> +     struct dentry *dentry = file_dentry(file);
>
> Hi Miklos,
>
> There is one thing I can't wrap my head around, so I better ask.
>
> file_dentry() will call ovl_d_real() and try to find dentry based on
> inode installed in f->f_inode. If ovl_d_real() can't find inode dentry
> matching the passed in inode, it warns.
>
> Assume, I have a stacked overlay configuration. Let me call top level
> overlay layer ovl1 and lower level overlay layer ovl2. Say I open a
> file foo.txt. Now ovl_open() in ovl1 decides that realinode is a lower
> inode and installs that inode f->f_inode of realfile. (This should be
> ovl2 layer inode, let me call it ovl2_inode). Now ovl_open() of ovl2 layer
> will be called and it will call file_dentry() and will look for dentry
> corresponding to ovl2_inode. I am wondering what if a copy up of foo.txt
> was triggered in ovl1 and by the time we called ovl_d_real(dentry,
> ovl2_inode), it will start comparing with inode of ovl1_upper and never
> find ovl2_inode.

Okay, so we've modified ovl_d_real() to allow returning the overlay
dentry itself.  This is important: when we fail to match ovl1_upper
with ovl2_inode, well go on to get ovl2_dentry and call d_real()
recursively.  That recursive call should match the inode, return it to
outer ovl_d_real(), which again will match the inode and return
without warning.

> IOW, I am not able to figure out how do we protect agains copy up races
> when ovl_open() calls file_dentry().

Racing with a copy up cannot matter, since we'll continue looking for
the inode in the layers and stacks below, regardless of whether we
checked the upper dentry or not.

Does that make it clearer?

Thanks,
Miklos
Vivek Goyal April 26, 2018, 2:56 p.m. UTC | #3
On Thu, Apr 26, 2018 at 04:43:53PM +0200, Miklos Szeredi wrote:
> On Thu, Apr 26, 2018 at 4:13 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > On Thu, Apr 12, 2018 at 05:08:00PM +0200, Miklos Szeredi wrote:
> >
> > [..]
> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> new file mode 100644
> >> index 000000000000..a0b606885c41
> >> --- /dev/null
> >> +++ b/fs/overlayfs/file.c
> >> @@ -0,0 +1,76 @@
> >> +/*
> >> + * Copyright (C) 2017 Red Hat, Inc.
> >> + *
> >> + * This program is free software; you can redistribute it and/or modify it
> >> + * under the terms of the GNU General Public License version 2 as published by
> >> + * the Free Software Foundation.
> >> + */
> >> +
> >> +#include <linux/cred.h>
> >> +#include <linux/file.h>
> >> +#include <linux/xattr.h>
> >> +#include "overlayfs.h"
> >> +
> >> +static struct file *ovl_open_realfile(const struct file *file)
> >> +{
> >> +     struct inode *inode = file_inode(file);
> >> +     struct inode *upperinode = ovl_inode_upper(inode);
> >> +     struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
> >> +     struct file *realfile;
> >> +     const struct cred *old_cred;
> >> +
> >> +     old_cred = ovl_override_creds(inode->i_sb);
> >> +     realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
> >> +                          realinode, current_cred(), false);
> >> +     revert_creds(old_cred);
> >> +
> >> +     pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
> >> +              file, file, upperinode ? 'u' : 'l', file->f_flags,
> >> +              realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
> >> +
> >> +     return realfile;
> >> +}
> >> +
> >> +static int ovl_open(struct inode *inode, struct file *file)
> >> +{
> >> +     struct dentry *dentry = file_dentry(file);
> >
> > Hi Miklos,
> >
> > There is one thing I can't wrap my head around, so I better ask.
> >
> > file_dentry() will call ovl_d_real() and try to find dentry based on
> > inode installed in f->f_inode. If ovl_d_real() can't find inode dentry
> > matching the passed in inode, it warns.
> >
> > Assume, I have a stacked overlay configuration. Let me call top level
> > overlay layer ovl1 and lower level overlay layer ovl2. Say I open a
> > file foo.txt. Now ovl_open() in ovl1 decides that realinode is a lower
> > inode and installs that inode f->f_inode of realfile. (This should be
> > ovl2 layer inode, let me call it ovl2_inode). Now ovl_open() of ovl2 layer
> > will be called and it will call file_dentry() and will look for dentry
> > corresponding to ovl2_inode. I am wondering what if a copy up of foo.txt
> > was triggered in ovl1 and by the time we called ovl_d_real(dentry,
> > ovl2_inode), it will start comparing with inode of ovl1_upper and never
> > find ovl2_inode.
> 
> Okay, so we've modified ovl_d_real() to allow returning the overlay
> dentry itself.  This is important: when we fail to match ovl1_upper
> with ovl2_inode, well go on to get ovl2_dentry and call d_real()
> recursively.  That recursive call should match the inode, return it to
> outer ovl_d_real(), which again will match the inode and return
> without warning.

So current code does following.

ovl_d_real() {
	...
	...

        real = ovl_dentry_real(dentry);
        if (inode == d_inode(real))
                return real;

        /* Handle recursion */
        if (unlikely(real->d_flags & DCACHE_OP_REAL))
                return real->d_op->d_real(real, inode);
}

If file got copied up in ovl1, then "real" will be ovl1_upper dentry. And
upper is regular fs (only ovl1 lower is overlay), then it should not have
DCACHE_OP_REAL set and that means we will not recurse further and not
find ovl2 dentry matching ovl2_inode and print warning and return
ovl1 dentry.

What am I missing.

Vivek

> 
> > IOW, I am not able to figure out how do we protect agains copy up races
> > when ovl_open() calls file_dentry().
> 
> Racing with a copy up cannot matter, since we'll continue looking for
> the inode in the layers and stacks below, regardless of whether we
> checked the upper dentry or not.
> 
> Does that make it clearer?
> 
> Thanks,
> Miklos
Miklos Szeredi April 26, 2018, 3:01 p.m. UTC | #4
On Thu, Apr 26, 2018 at 4:56 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> On Thu, Apr 26, 2018 at 04:43:53PM +0200, Miklos Szeredi wrote:
>> On Thu, Apr 26, 2018 at 4:13 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
>> > On Thu, Apr 12, 2018 at 05:08:00PM +0200, Miklos Szeredi wrote:
>> >
>> > [..]
>> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
>> >> new file mode 100644
>> >> index 000000000000..a0b606885c41
>> >> --- /dev/null
>> >> +++ b/fs/overlayfs/file.c
>> >> @@ -0,0 +1,76 @@
>> >> +/*
>> >> + * Copyright (C) 2017 Red Hat, Inc.
>> >> + *
>> >> + * This program is free software; you can redistribute it and/or modify it
>> >> + * under the terms of the GNU General Public License version 2 as published by
>> >> + * the Free Software Foundation.
>> >> + */
>> >> +
>> >> +#include <linux/cred.h>
>> >> +#include <linux/file.h>
>> >> +#include <linux/xattr.h>
>> >> +#include "overlayfs.h"
>> >> +
>> >> +static struct file *ovl_open_realfile(const struct file *file)
>> >> +{
>> >> +     struct inode *inode = file_inode(file);
>> >> +     struct inode *upperinode = ovl_inode_upper(inode);
>> >> +     struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
>> >> +     struct file *realfile;
>> >> +     const struct cred *old_cred;
>> >> +
>> >> +     old_cred = ovl_override_creds(inode->i_sb);
>> >> +     realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
>> >> +                          realinode, current_cred(), false);
>> >> +     revert_creds(old_cred);
>> >> +
>> >> +     pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
>> >> +              file, file, upperinode ? 'u' : 'l', file->f_flags,
>> >> +              realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
>> >> +
>> >> +     return realfile;
>> >> +}
>> >> +
>> >> +static int ovl_open(struct inode *inode, struct file *file)
>> >> +{
>> >> +     struct dentry *dentry = file_dentry(file);
>> >
>> > Hi Miklos,
>> >
>> > There is one thing I can't wrap my head around, so I better ask.
>> >
>> > file_dentry() will call ovl_d_real() and try to find dentry based on
>> > inode installed in f->f_inode. If ovl_d_real() can't find inode dentry
>> > matching the passed in inode, it warns.
>> >
>> > Assume, I have a stacked overlay configuration. Let me call top level
>> > overlay layer ovl1 and lower level overlay layer ovl2. Say I open a
>> > file foo.txt. Now ovl_open() in ovl1 decides that realinode is a lower
>> > inode and installs that inode f->f_inode of realfile. (This should be
>> > ovl2 layer inode, let me call it ovl2_inode). Now ovl_open() of ovl2 layer
>> > will be called and it will call file_dentry() and will look for dentry
>> > corresponding to ovl2_inode. I am wondering what if a copy up of foo.txt
>> > was triggered in ovl1 and by the time we called ovl_d_real(dentry,
>> > ovl2_inode), it will start comparing with inode of ovl1_upper and never
>> > find ovl2_inode.
>>
>> Okay, so we've modified ovl_d_real() to allow returning the overlay
>> dentry itself.  This is important: when we fail to match ovl1_upper
>> with ovl2_inode, well go on to get ovl2_dentry and call d_real()
>> recursively.  That recursive call should match the inode, return it to
>> outer ovl_d_real(), which again will match the inode and return
>> without warning.
>
> So current code does following.
>
> ovl_d_real() {
>         ...
>         ...
>
>         real = ovl_dentry_real(dentry);
>         if (inode == d_inode(real))
>                 return real;
>
>         /* Handle recursion */
>         if (unlikely(real->d_flags & DCACHE_OP_REAL))
>                 return real->d_op->d_real(real, inode);
> }
>
> If file got copied up in ovl1, then "real" will be ovl1_upper dentry. And
> upper is regular fs (only ovl1 lower is overlay), then it should not have
> DCACHE_OP_REAL set and that means we will not recurse further and not
> find ovl2 dentry matching ovl2_inode and print warning and return
> ovl1 dentry.
>
> What am I missing.

Ah,  that's indeed buggy.  The bug is in "[RFC PATCH 34/35] vfs:
simplify d_op->d_real()".

I've already reverted that (due to d_real_inode() acquiring a new
user) and the old code should be good (AFAICS).

Thanks,
Miklos
Vivek Goyal April 26, 2018, 3:13 p.m. UTC | #5
On Thu, Apr 26, 2018 at 05:01:37PM +0200, Miklos Szeredi wrote:
> On Thu, Apr 26, 2018 at 4:56 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> > On Thu, Apr 26, 2018 at 04:43:53PM +0200, Miklos Szeredi wrote:
> >> On Thu, Apr 26, 2018 at 4:13 PM, Vivek Goyal <vgoyal@redhat.com> wrote:
> >> > On Thu, Apr 12, 2018 at 05:08:00PM +0200, Miklos Szeredi wrote:
> >> >
> >> > [..]
> >> >> diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
> >> >> new file mode 100644
> >> >> index 000000000000..a0b606885c41
> >> >> --- /dev/null
> >> >> +++ b/fs/overlayfs/file.c
> >> >> @@ -0,0 +1,76 @@
> >> >> +/*
> >> >> + * Copyright (C) 2017 Red Hat, Inc.
> >> >> + *
> >> >> + * This program is free software; you can redistribute it and/or modify it
> >> >> + * under the terms of the GNU General Public License version 2 as published by
> >> >> + * the Free Software Foundation.
> >> >> + */
> >> >> +
> >> >> +#include <linux/cred.h>
> >> >> +#include <linux/file.h>
> >> >> +#include <linux/xattr.h>
> >> >> +#include "overlayfs.h"
> >> >> +
> >> >> +static struct file *ovl_open_realfile(const struct file *file)
> >> >> +{
> >> >> +     struct inode *inode = file_inode(file);
> >> >> +     struct inode *upperinode = ovl_inode_upper(inode);
> >> >> +     struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
> >> >> +     struct file *realfile;
> >> >> +     const struct cred *old_cred;
> >> >> +
> >> >> +     old_cred = ovl_override_creds(inode->i_sb);
> >> >> +     realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
> >> >> +                          realinode, current_cred(), false);
> >> >> +     revert_creds(old_cred);
> >> >> +
> >> >> +     pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
> >> >> +              file, file, upperinode ? 'u' : 'l', file->f_flags,
> >> >> +              realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
> >> >> +
> >> >> +     return realfile;
> >> >> +}
> >> >> +
> >> >> +static int ovl_open(struct inode *inode, struct file *file)
> >> >> +{
> >> >> +     struct dentry *dentry = file_dentry(file);
> >> >
> >> > Hi Miklos,
> >> >
> >> > There is one thing I can't wrap my head around, so I better ask.
> >> >
> >> > file_dentry() will call ovl_d_real() and try to find dentry based on
> >> > inode installed in f->f_inode. If ovl_d_real() can't find inode dentry
> >> > matching the passed in inode, it warns.
> >> >
> >> > Assume, I have a stacked overlay configuration. Let me call top level
> >> > overlay layer ovl1 and lower level overlay layer ovl2. Say I open a
> >> > file foo.txt. Now ovl_open() in ovl1 decides that realinode is a lower
> >> > inode and installs that inode f->f_inode of realfile. (This should be
> >> > ovl2 layer inode, let me call it ovl2_inode). Now ovl_open() of ovl2 layer
> >> > will be called and it will call file_dentry() and will look for dentry
> >> > corresponding to ovl2_inode. I am wondering what if a copy up of foo.txt
> >> > was triggered in ovl1 and by the time we called ovl_d_real(dentry,
> >> > ovl2_inode), it will start comparing with inode of ovl1_upper and never
> >> > find ovl2_inode.
> >>
> >> Okay, so we've modified ovl_d_real() to allow returning the overlay
> >> dentry itself.  This is important: when we fail to match ovl1_upper
> >> with ovl2_inode, well go on to get ovl2_dentry and call d_real()
> >> recursively.  That recursive call should match the inode, return it to
> >> outer ovl_d_real(), which again will match the inode and return
> >> without warning.
> >
> > So current code does following.
> >
> > ovl_d_real() {
> >         ...
> >         ...
> >
> >         real = ovl_dentry_real(dentry);
> >         if (inode == d_inode(real))
> >                 return real;
> >
> >         /* Handle recursion */
> >         if (unlikely(real->d_flags & DCACHE_OP_REAL))
> >                 return real->d_op->d_real(real, inode);
> > }
> >
> > If file got copied up in ovl1, then "real" will be ovl1_upper dentry. And
> > upper is regular fs (only ovl1 lower is overlay), then it should not have
> > DCACHE_OP_REAL set and that means we will not recurse further and not
> > find ovl2 dentry matching ovl2_inode and print warning and return
> > ovl1 dentry.
> >
> > What am I missing.
> 
> Ah,  that's indeed buggy.  The bug is in "[RFC PATCH 34/35] vfs:
> simplify d_op->d_real()".
> 
> I've already reverted that (due to d_real_inode() acquiring a new
> user) and the old code should be good (AFAICS).

Aha, cool. thanks. While I am at it, let me just ask one more stupid
question.

I am wondering while opening the underlying realfile, why do we pass
in the path/dentry of ovl layer (and not underlying real layer).

        realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
                             realinode, current_cred(), false);

This forces us to do file_dentry() in ovl_open() later to map top level
dentry to underlying dentry.

We know the realinode and should be figure out real dentry. Can't we
construct path from underlying dentry and mount point and use that
to open underlying real file.  I am sure there is some reason for doing
this way, just trying to understand it.

Vivek
Miklos Szeredi April 26, 2018, 3:21 p.m. UTC | #6
On Thu, Apr 26, 2018 at 5:13 PM, Vivek Goyal <vgoyal@redhat.com> wrote:

> Aha, cool. thanks. While I am at it, let me just ask one more stupid
> question.
>
> I am wondering while opening the underlying realfile, why do we pass
> in the path/dentry of ovl layer (and not underlying real layer).
>
>         realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
>                              realinode, current_cred(), false);
>
> This forces us to do file_dentry() in ovl_open() later to map top level
> dentry to underlying dentry.
>
> We know the realinode and should be figure out real dentry. Can't we
> construct path from underlying dentry and mount point and use that
> to open underlying real file.  I am sure there is some reason for doing
> this way, just trying to understand it.

The logical thing would be to just use the real path (as returned by
ovl_path_real()).

The reason we don't do that is because mmap stores the real file in
vma->vm_file and vm_file->f_path is used in various places (e.g.
/proc/PID/maps).

We could have a separate realfile for mmap, but that would be
additional complexity and memory use, so I don't think it makes sense.

Thanks,
Miklos
diff mbox

Patch

diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile
index 30802347a020..46e1ff8ac056 100644
--- a/fs/overlayfs/Makefile
+++ b/fs/overlayfs/Makefile
@@ -4,5 +4,5 @@ 
 
 obj-$(CONFIG_OVERLAY_FS) += overlay.o
 
-overlay-objs := super.o namei.o util.o inode.o dir.o readdir.o copy_up.o \
-		export.o
+overlay-objs := super.o namei.o util.o inode.o file.o dir.o readdir.o \
+		copy_up.o export.o
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
new file mode 100644
index 000000000000..a0b606885c41
--- /dev/null
+++ b/fs/overlayfs/file.c
@@ -0,0 +1,76 @@ 
+/*
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/cred.h>
+#include <linux/file.h>
+#include <linux/xattr.h>
+#include "overlayfs.h"
+
+static struct file *ovl_open_realfile(const struct file *file)
+{
+	struct inode *inode = file_inode(file);
+	struct inode *upperinode = ovl_inode_upper(inode);
+	struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
+	struct file *realfile;
+	const struct cred *old_cred;
+
+	old_cred = ovl_override_creds(inode->i_sb);
+	realfile = path_open(&file->f_path, file->f_flags | O_NOATIME,
+			     realinode, current_cred(), false);
+	revert_creds(old_cred);
+
+	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
+		 file, file, upperinode ? 'u' : 'l', file->f_flags,
+		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
+
+	return realfile;
+}
+
+static int ovl_open(struct inode *inode, struct file *file)
+{
+	struct dentry *dentry = file_dentry(file);
+	struct file *realfile;
+	int err;
+
+	err = ovl_open_maybe_copy_up(dentry, file->f_flags);
+	if (err)
+		return err;
+
+	/* No longer need these flags, so don't pass them on to underlying fs */
+	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+	realfile = ovl_open_realfile(file);
+	if (IS_ERR(realfile))
+		return PTR_ERR(realfile);
+
+	file->private_data = realfile;
+
+	return 0;
+}
+
+static int ovl_release(struct inode *inode, struct file *file)
+{
+	fput(file->private_data);
+
+	return 0;
+}
+
+static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
+{
+	struct inode *realinode = ovl_inode_real(file_inode(file));
+
+	return generic_file_llseek_size(file, offset, whence,
+					realinode->i_sb->s_maxbytes,
+					i_size_read(realinode));
+}
+
+const struct file_operations ovl_file_operations = {
+	.open		= ovl_open,
+	.release	= ovl_release,
+	.llseek		= ovl_llseek,
+};
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index e213bb9823ec..3b996639e1ad 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -542,6 +542,7 @@  static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev,
 	switch (mode & S_IFMT) {
 	case S_IFREG:
 		inode->i_op = &ovl_file_inode_operations;
+		inode->i_fop = &ovl_file_operations;
 		break;
 
 	case S_IFDIR:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 10f5f3bf9d96..b29c1688f372 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -381,6 +381,9 @@  int ovl_create_real(struct inode *dir, struct dentry *newdentry,
 		    struct dentry *hardlink, bool debug);
 int ovl_cleanup(struct inode *dir, struct dentry *dentry);
 
+/* file.c */
+extern const struct file_operations ovl_file_operations;
+
 /* copy_up.c */
 int ovl_copy_up(struct dentry *dentry);
 int ovl_copy_up_flags(struct dentry *dentry, int flags);