diff mbox

[v2,10/17] ovl: decode lower file handles of unlinked but open files

Message ID 1515086449-26563-11-git-send-email-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Amir Goldstein Jan. 4, 2018, 5:20 p.m. UTC
Lookup overlay inode in cache by origin inode, so we can decode a file
handle of an open file even if the index has a whiteout index entry to
mark this overlay inode was unlinked.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
 fs/overlayfs/inode.c     | 16 ++++++++++++++++
 fs/overlayfs/overlayfs.h |  1 +
 3 files changed, 37 insertions(+), 2 deletions(-)

Comments

Miklos Szeredi Jan. 16, 2018, 9:16 a.m. UTC | #1
On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> Lookup overlay inode in cache by origin inode, so we can decode a file
> handle of an open file even if the index has a whiteout index entry to
> mark this overlay inode was unlinked.
>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>  fs/overlayfs/overlayfs.h |  1 +
>  3 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
> index 602bada474ba..6ecb54d4b52c 100644
> --- a/fs/overlayfs/export.c
> +++ b/fs/overlayfs/export.c
> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>         struct ovl_path *stack = &origin;
>         struct dentry *dentry = NULL;
>         struct dentry *index = NULL;
> +       struct inode *inode = NULL;
> +       bool is_deleted = false;
>         int err;
>
>         /* First lookup indexed upper by fh */

Why not first look up origin, then look up ovl inode by origin?  It
seems a faster path than going through the index first.  Obviously if
icache lookup fails then we need to look up index, but the common case
will the cached one, so that should be the fast one, no?

Thanks,
Miklos
Amir Goldstein Jan. 16, 2018, 9:37 a.m. UTC | #2
On Tue, Jan 16, 2018 at 11:16 AM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> Lookup overlay inode in cache by origin inode, so we can decode a file
>> handle of an open file even if the index has a whiteout index entry to
>> mark this overlay inode was unlinked.
>>
>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>  fs/overlayfs/overlayfs.h |  1 +
>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>> index 602bada474ba..6ecb54d4b52c 100644
>> --- a/fs/overlayfs/export.c
>> +++ b/fs/overlayfs/export.c
>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>         struct ovl_path *stack = &origin;
>>         struct dentry *dentry = NULL;
>>         struct dentry *index = NULL;
>> +       struct inode *inode = NULL;
>> +       bool is_deleted = false;
>>         int err;
>>
>>         /* First lookup indexed upper by fh */
>
> Why not first look up origin, then look up ovl inode by origin?  It
> seems a faster path than going through the index first.  Obviously if
> icache lookup fails then we need to look up index, but the common case
> will the cached one, so that should be the fast one, no?
>

Not really, because we do not know if the file handle is dir or non-dir.
If file handle is dir than decode of file handle is expensive and can
reduce worst case from two file handle decodes to just one:

For lower dir:
- one index lookup fails
- one lower dir decode
- one icache lookup
- maybe one ovl_lookup_real(is_upper=false)

For copied up indexed dir:
- one index lookup success
- one upper dir decode
- one ovl_lookup_real(is_upper=true)

That method avoids the origin dir decode for upper indexed
dir at the cost of not looking for the decoded dir in icache.

How about this as in idea: hash overlay inodes for NFS export
by origin fh instead of by origin inode pointer.

We can also avoid the "lookup index first" for non-dir
if we set a flag OVL_FH_FLAG_CONNECTABLE on exported
dir file handle, but my thinking was trying to keep the first version
simple with as fewer special cases as possible.

Thanks,
Amir.
Miklos Szeredi Jan. 16, 2018, 10:10 a.m. UTC | #3
On Tue, Jan 16, 2018 at 10:37 AM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Tue, Jan 16, 2018 at 11:16 AM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>>> Lookup overlay inode in cache by origin inode, so we can decode a file
>>> handle of an open file even if the index has a whiteout index entry to
>>> mark this overlay inode was unlinked.
>>>
>>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>>> ---
>>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>>  fs/overlayfs/overlayfs.h |  1 +
>>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>>> index 602bada474ba..6ecb54d4b52c 100644
>>> --- a/fs/overlayfs/export.c
>>> +++ b/fs/overlayfs/export.c
>>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>>         struct ovl_path *stack = &origin;
>>>         struct dentry *dentry = NULL;
>>>         struct dentry *index = NULL;
>>> +       struct inode *inode = NULL;
>>> +       bool is_deleted = false;
>>>         int err;
>>>
>>>         /* First lookup indexed upper by fh */
>>
>> Why not first look up origin, then look up ovl inode by origin?  It
>> seems a faster path than going through the index first.  Obviously if
>> icache lookup fails then we need to look up index, but the common case
>> will the cached one, so that should be the fast one, no?
>>
>
> Not really, because we do not know if the file handle is dir or non-dir.
> If file handle is dir than decode of file handle is expensive and can
> reduce worst case from two file handle decodes to just one:
>
> For lower dir:
> - one index lookup fails
> - one lower dir decode
> - one icache lookup
> - maybe one ovl_lookup_real(is_upper=false)
>
> For copied up indexed dir:
> - one index lookup success
> - one upper dir decode
> - one ovl_lookup_real(is_upper=true)
>
> That method avoids the origin dir decode for upper indexed
> dir at the cost of not looking for the decoded dir in icache.
>
> How about this as in idea: hash overlay inodes for NFS export
> by origin fh instead of by origin inode pointer.

Good idea.  That way we can leave out the middleman (underlying fh
decode) in the cached case.

> We can also avoid the "lookup index first" for non-dir
> if we set a flag OVL_FH_FLAG_CONNECTABLE on exported
> dir file handle, but my thinking was trying to keep the first version
> simple with as fewer special cases as possible.

Not sure I understand.  If cached lookup fails, then we do always need
to try and lookup index first before falling back to decoding origin,
right?

Thanks,
Miklos




>
> Thanks,
> Amir.
Amir Goldstein Jan. 16, 2018, 10:40 a.m. UTC | #4
On Tue, Jan 16, 2018 at 12:10 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Tue, Jan 16, 2018 at 10:37 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>> On Tue, Jan 16, 2018 at 11:16 AM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>>>> Lookup overlay inode in cache by origin inode, so we can decode a file
>>>> handle of an open file even if the index has a whiteout index entry to
>>>> mark this overlay inode was unlinked.
>>>>
>>>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>>>> ---
>>>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>>>  fs/overlayfs/overlayfs.h |  1 +
>>>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>>>> index 602bada474ba..6ecb54d4b52c 100644
>>>> --- a/fs/overlayfs/export.c
>>>> +++ b/fs/overlayfs/export.c
>>>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>>>         struct ovl_path *stack = &origin;
>>>>         struct dentry *dentry = NULL;
>>>>         struct dentry *index = NULL;
>>>> +       struct inode *inode = NULL;
>>>> +       bool is_deleted = false;
>>>>         int err;
>>>>
>>>>         /* First lookup indexed upper by fh */
>>>
>>> Why not first look up origin, then look up ovl inode by origin?  It
>>> seems a faster path than going through the index first.  Obviously if
>>> icache lookup fails then we need to look up index, but the common case
>>> will the cached one, so that should be the fast one, no?
>>>
>>
>> Not really, because we do not know if the file handle is dir or non-dir.
>> If file handle is dir than decode of file handle is expensive and can
>> reduce worst case from two file handle decodes to just one:
>>
>> For lower dir:
>> - one index lookup fails
>> - one lower dir decode
>> - one icache lookup
>> - maybe one ovl_lookup_real(is_upper=false)
>>
>> For copied up indexed dir:
>> - one index lookup success
>> - one upper dir decode
>> - one ovl_lookup_real(is_upper=true)
>>
>> That method avoids the origin dir decode for upper indexed
>> dir at the cost of not looking for the decoded dir in icache.
>>
>> How about this as in idea: hash overlay inodes for NFS export
>> by origin fh instead of by origin inode pointer.
>
> Good idea.  That way we can leave out the middleman (underlying fh
> decode) in the cached case.
>
>> We can also avoid the "lookup index first" for non-dir
>> if we set a flag OVL_FH_FLAG_CONNECTABLE on exported
>> dir file handle, but my thinking was trying to keep the first version
>> simple with as fewer special cases as possible.
>
> Not sure I understand.  If cached lookup fails, then we do always need
> to try and lookup index first before falling back to decoding origin,
> right?
>

If you are referring to cache lookup by origin fh, then yes.
If icache by origin fh lookup fails, we should lookup index to check
for whiteout, before we decode origin fh, because index lookup is
cheaper than reconnecting a connectable file handle decode.

If we had marked the file handle 'non-connectable', then for non-dir
non-connectable file handles, origin decode is actually slightly
faster than index lookup, but I don't think it is worth the special
casing and marking the file handle for the corner case, right?

Thanks,
Amir.
Miklos Szeredi Jan. 16, 2018, 11:07 a.m. UTC | #5
On Tue, Jan 16, 2018 at 11:40 AM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Tue, Jan 16, 2018 at 12:10 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>> On Tue, Jan 16, 2018 at 10:37 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>>> On Tue, Jan 16, 2018 at 11:16 AM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>>> On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>>>>> Lookup overlay inode in cache by origin inode, so we can decode a file
>>>>> handle of an open file even if the index has a whiteout index entry to
>>>>> mark this overlay inode was unlinked.
>>>>>
>>>>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>>>>> ---
>>>>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>>>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>>>>  fs/overlayfs/overlayfs.h |  1 +
>>>>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>>>>> index 602bada474ba..6ecb54d4b52c 100644
>>>>> --- a/fs/overlayfs/export.c
>>>>> +++ b/fs/overlayfs/export.c
>>>>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>>>>         struct ovl_path *stack = &origin;
>>>>>         struct dentry *dentry = NULL;
>>>>>         struct dentry *index = NULL;
>>>>> +       struct inode *inode = NULL;
>>>>> +       bool is_deleted = false;
>>>>>         int err;
>>>>>
>>>>>         /* First lookup indexed upper by fh */
>>>>
>>>> Why not first look up origin, then look up ovl inode by origin?  It
>>>> seems a faster path than going through the index first.  Obviously if
>>>> icache lookup fails then we need to look up index, but the common case
>>>> will the cached one, so that should be the fast one, no?
>>>>
>>>
>>> Not really, because we do not know if the file handle is dir or non-dir.
>>> If file handle is dir than decode of file handle is expensive and can
>>> reduce worst case from two file handle decodes to just one:
>>>
>>> For lower dir:
>>> - one index lookup fails
>>> - one lower dir decode
>>> - one icache lookup
>>> - maybe one ovl_lookup_real(is_upper=false)
>>>
>>> For copied up indexed dir:
>>> - one index lookup success
>>> - one upper dir decode
>>> - one ovl_lookup_real(is_upper=true)
>>>
>>> That method avoids the origin dir decode for upper indexed
>>> dir at the cost of not looking for the decoded dir in icache.
>>>
>>> How about this as in idea: hash overlay inodes for NFS export
>>> by origin fh instead of by origin inode pointer.
>>
>> Good idea.  That way we can leave out the middleman (underlying fh
>> decode) in the cached case.
>>
>>> We can also avoid the "lookup index first" for non-dir
>>> if we set a flag OVL_FH_FLAG_CONNECTABLE on exported
>>> dir file handle, but my thinking was trying to keep the first version
>>> simple with as fewer special cases as possible.
>>
>> Not sure I understand.  If cached lookup fails, then we do always need
>> to try and lookup index first before falling back to decoding origin,
>> right?
>>
>
> If you are referring to cache lookup by origin fh, then yes.
> If icache by origin fh lookup fails, we should lookup index to check
> for whiteout, before we decode origin fh, because index lookup is
> cheaper than reconnecting a connectable file handle decode.
>
> If we had marked the file handle 'non-connectable', then for non-dir
> non-connectable file handles, origin decode is actually slightly
> faster than index lookup, but I don't think it is worth the special
> casing and marking the file handle for the corner case, right?

My point is: if icache lookup fails, then for origin handles we always
have to do an index lookup to find the current version overlay object.
So no point in doing the origin decode first, since that one may not
be needed (if index is a whiteout).

Thanks,
Miklos
Amir Goldstein Jan. 17, 2018, 9:05 p.m. UTC | #6
On Tue, Jan 16, 2018 at 12:10 PM, Miklos Szeredi <miklos@szeredi.hu> wrote:
> On Tue, Jan 16, 2018 at 10:37 AM, Amir Goldstein <amir73il@gmail.com> wrote:
>> On Tue, Jan 16, 2018 at 11:16 AM, Miklos Szeredi <miklos@szeredi.hu> wrote:
>>> On Thu, Jan 4, 2018 at 6:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>>>> Lookup overlay inode in cache by origin inode, so we can decode a file
>>>> handle of an open file even if the index has a whiteout index entry to
>>>> mark this overlay inode was unlinked.
>>>>
>>>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>>>> ---
>>>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>>>  fs/overlayfs/overlayfs.h |  1 +
>>>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>>>> index 602bada474ba..6ecb54d4b52c 100644
>>>> --- a/fs/overlayfs/export.c
>>>> +++ b/fs/overlayfs/export.c
>>>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>>>         struct ovl_path *stack = &origin;
>>>>         struct dentry *dentry = NULL;
>>>>         struct dentry *index = NULL;
>>>> +       struct inode *inode = NULL;
>>>> +       bool is_deleted = false;
>>>>         int err;
>>>>
>>>>         /* First lookup indexed upper by fh */
>>>
>>> Why not first look up origin, then look up ovl inode by origin?  It
>>> seems a faster path than going through the index first.  Obviously if
>>> icache lookup fails then we need to look up index, but the common case
>>> will the cached one, so that should be the fast one, no?
>>>
>>
>> Not really, because we do not know if the file handle is dir or non-dir.
>> If file handle is dir than decode of file handle is expensive and can
>> reduce worst case from two file handle decodes to just one:
>>
>> For lower dir:
>> - one index lookup fails
>> - one lower dir decode
>> - one icache lookup
>> - maybe one ovl_lookup_real(is_upper=false)
>>
>> For copied up indexed dir:
>> - one index lookup success
>> - one upper dir decode
>> - one ovl_lookup_real(is_upper=true)
>>
>> That method avoids the origin dir decode for upper indexed
>> dir at the cost of not looking for the decoded dir in icache.
>>
>> How about this as in idea: hash overlay inodes for NFS export
>> by origin fh instead of by origin inode pointer.
>
> Good idea.  That way we can leave out the middleman (underlying fh
> decode) in the cached case.
>

If it's all right with you, I prefer to get the initial version out the door
first and handle this optimization later.

Shout if you disagree.

Thanks,
Amir.
Amir Goldstein Jan. 18, 2018, 2:18 p.m. UTC | #7
On Thu, Jan 4, 2018 at 7:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> Lookup overlay inode in cache by origin inode, so we can decode a file
> handle of an open file even if the index has a whiteout index entry to
> mark this overlay inode was unlinked.
>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>  fs/overlayfs/overlayfs.h |  1 +
>  3 files changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
> index 602bada474ba..6ecb54d4b52c 100644
> --- a/fs/overlayfs/export.c
> +++ b/fs/overlayfs/export.c
> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>         struct ovl_path *stack = &origin;
>         struct dentry *dentry = NULL;
>         struct dentry *index = NULL;
> +       struct inode *inode = NULL;
> +       bool is_deleted = false;
>         int err;
>
>         /* First lookup indexed upper by fh */
>         index = ovl_get_index_fh(ofs, fh);
>         err = PTR_ERR(index);
> -       if (IS_ERR(index))
> -               return ERR_PTR(err);
> +       if (IS_ERR(index)) {
> +               if (err != -ESTALE)
> +                       return ERR_PTR(err);
> +
> +               /* Found a whiteout index - treat as deleted inode */
> +               is_deleted = true;
> +               index = NULL;

Ouch! it seems I was misleading you.
If we find a whiteout index for dir, we *do* decode+reconnect origin,
because we want to find out if this is an unlinked but open non-dir.
I guess there are 2 ways to avoid this unneeded decode:
1. mark a "directory index whiteout" differently than "non-dir index whiteout"
2. lookup icache by file handle

> +       }
>
>         /* Then lookup origin by fh */
>         err = ovl_check_origin_fh(fh, NULL, ofs->lower_layers, ofs->numlower,
> @@ -404,6 +412,15 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>                 err = ovl_verify_origin(index, origin.dentry, false, false);
>                 if (err)
>                         goto out_err;
> +       } else if (is_deleted && origin.dentry && !d_is_dir(origin.dentry)) {
> +               /* Lookup deleted overlay inode by origin inode */
> +               inode = ovl_lookup_inode(sb, origin.dentry);
> +               err = -ESTALE;
> +               if (!inode || atomic_read(&inode->i_count) == 1)
> +                       goto out_err;
> +
> +               /* Deleted but still open? */
> +               index = dget(ovl_i_dentry_upper(inode));
>         }

And to top that up, we even try to lookup the origin dir path in overlay
instead of returning ESTALE right away.
Sheesh! that's embarrassing.

Thanks,
Amir.
Amir Goldstein Feb. 27, 2018, 11:35 a.m. UTC | #8
On Thu, Jan 18, 2018 at 4:18 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Thu, Jan 4, 2018 at 7:20 PM, Amir Goldstein <amir73il@gmail.com> wrote:
>> Lookup overlay inode in cache by origin inode, so we can decode a file
>> handle of an open file even if the index has a whiteout index entry to
>> mark this overlay inode was unlinked.
>>
>> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>> ---
>>  fs/overlayfs/export.c    | 22 ++++++++++++++++++++--
>>  fs/overlayfs/inode.c     | 16 ++++++++++++++++
>>  fs/overlayfs/overlayfs.h |  1 +
>>  3 files changed, 37 insertions(+), 2 deletions(-)
>>
>> diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
>> index 602bada474ba..6ecb54d4b52c 100644
>> --- a/fs/overlayfs/export.c
>> +++ b/fs/overlayfs/export.c
>> @@ -385,13 +385,21 @@ static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
>>         struct ovl_path *stack = &origin;
>>         struct dentry *dentry = NULL;
>>         struct dentry *index = NULL;
>> +       struct inode *inode = NULL;
>> +       bool is_deleted = false;
>>         int err;
>>
>>         /* First lookup indexed upper by fh */
>>         index = ovl_get_index_fh(ofs, fh);
>>         err = PTR_ERR(index);
>> -       if (IS_ERR(index))
>> -               return ERR_PTR(err);
>> +       if (IS_ERR(index)) {
>> +               if (err != -ESTALE)
>> +                       return ERR_PTR(err);
>> +
>> +               /* Found a whiteout index - treat as deleted inode */
>> +               is_deleted = true;
>> +               index = NULL;
>
> Ouch! it seems I was misleading you.
> If we find a whiteout index for dir, we *do* decode+reconnect origin,
> because we want to find out if this is an unlinked but open non-dir.
> I guess there are 2 ways to avoid this unneeded decode:
> 1. mark a "directory index whiteout" differently than "non-dir index whiteout"
> 2. lookup icache by file handle
>

Getting back to this.
I have implemented lookup icache by file handle and realized that it incurs
higher CPU usage in the common case because the hash function is more
expensive. I do not have benchmark numbers to present.

However, I also realized there is a 3rd option, which seems like a
better option:

- Call the underlying fs fh_to_dentry() operation (and not exportfs_decode_fh())
  to get a possibly disconnected origin dentry
- Lookup overlay inode by origin inode
- IF overlay inode not cached lookup index by origin fh
- IF origin dentry is a disconnected directory AND overlay inode is not cached
  AND index is not found, only then call exportfs_decode_fh() of origin fh
  to reconnect the origin dir

I'll try to write this up.

Thanks,
Amir.
diff mbox

Patch

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 602bada474ba..6ecb54d4b52c 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -385,13 +385,21 @@  static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 	struct ovl_path *stack = &origin;
 	struct dentry *dentry = NULL;
 	struct dentry *index = NULL;
+	struct inode *inode = NULL;
+	bool is_deleted = false;
 	int err;
 
 	/* First lookup indexed upper by fh */
 	index = ovl_get_index_fh(ofs, fh);
 	err = PTR_ERR(index);
-	if (IS_ERR(index))
-		return ERR_PTR(err);
+	if (IS_ERR(index)) {
+		if (err != -ESTALE)
+			return ERR_PTR(err);
+
+		/* Found a whiteout index - treat as deleted inode */
+		is_deleted = true;
+		index = NULL;
+	}
 
 	/* Then lookup origin by fh */
 	err = ovl_check_origin_fh(fh, NULL, ofs->lower_layers, ofs->numlower,
@@ -404,6 +412,15 @@  static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 		err = ovl_verify_origin(index, origin.dentry, false, false);
 		if (err)
 			goto out_err;
+	} else if (is_deleted && origin.dentry && !d_is_dir(origin.dentry)) {
+		/* Lookup deleted overlay inode by origin inode */
+		inode = ovl_lookup_inode(sb, origin.dentry);
+		err = -ESTALE;
+		if (!inode || atomic_read(&inode->i_count) == 1)
+			goto out_err;
+
+		/* Deleted but still open? */
+		index = dget(ovl_i_dentry_upper(inode));
 	}
 
 	dentry = ovl_get_dentry(sb, NULL, &origin, index);
@@ -411,6 +428,7 @@  static struct dentry *ovl_lower_fh_to_d(struct super_block *sb,
 out:
 	dput(origin.dentry);
 	dput(index);
+	iput(inode);
 	return dentry;
 
 out_err:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index bb742d195f12..a25908ba3512 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -637,6 +637,22 @@  static bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry,
 	return true;
 }
 
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin)
+{
+	struct inode *inode, *key = d_inode(origin);
+
+	inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key);
+	if (!inode)
+		return NULL;
+
+	if (!ovl_verify_inode(inode, origin, NULL)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+
+	return inode;
+}
+
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower)
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 2ddd74043b5f..8fa8253af7cb 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -305,6 +305,7 @@  int ovl_update_time(struct inode *inode, struct timespec *ts, int flags);
 bool ovl_is_private_xattr(const char *name);
 
 struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev);
+struct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *origin);
 struct inode *ovl_get_inode(struct super_block *sb, struct dentry *upperdentry,
 			    struct dentry *lowerdentry, struct dentry *index,
 			    unsigned int numlower);