diff mbox

vfs: avoid recopying filename in getname_flags

Message ID 1424867468-17346-1-git-send-email-boqun.feng@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Boqun Feng Feb. 25, 2015, 12:31 p.m. UTC
In the current implementation of getname_flags, filename in the
user-space will be recopied if it takes more space that
EMBEDDED_NAME_MAX, however, at this moment, EMBEDDED_NAME_MAX bytes of
the filename are already copied into kernel space, the only reason why
the recopy is needed is that "kname" needs to be relocated.

And the recopy can be avoided if we change the memory layout of the
"names_cache" allocation. By putting the struct "filename" at the tail
of the allocation instead of the head, relocation of kname is avoided.

Once putting the struct at the tail, each byte in the user space will be
copied only one time, so the recopy is avoided and code is more clear.

Of course, other functions aware of the layout of the names_cache
allocation, i.e., getname_kernel and putname also need to be modified to
adapt to the new layout.

This patch is based on v4.0-rc1.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Paul Moore <pmoore@redhat.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
---
 fs/namei.c | 50 +++++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 19 deletions(-)

Comments

Boqun Feng March 9, 2015, 8:24 a.m. UTC | #1
Ping.
Any opinion?

Thanks,
Boqun Feng

On Wed, Feb 25, 2015 at 8:31 PM, Boqun Feng <boqun.feng@gmail.com> wrote:
> In the current implementation of getname_flags, filename in the
> user-space will be recopied if it takes more space that
> EMBEDDED_NAME_MAX, however, at this moment, EMBEDDED_NAME_MAX bytes of
> the filename are already copied into kernel space, the only reason why
> the recopy is needed is that "kname" needs to be relocated.
>
> And the recopy can be avoided if we change the memory layout of the
> "names_cache" allocation. By putting the struct "filename" at the tail
> of the allocation instead of the head, relocation of kname is avoided.
>
> Once putting the struct at the tail, each byte in the user space will be
> copied only one time, so the recopy is avoided and code is more clear.
>
> Of course, other functions aware of the layout of the names_cache
> allocation, i.e., getname_kernel and putname also need to be modified to
> adapt to the new layout.
>
> This patch is based on v4.0-rc1.
>
> Cc: Al Viro <viro@zeniv.linux.org.uk>
> Cc: Paul Moore <pmoore@redhat.com>
> Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> ---
>  fs/namei.c | 50 +++++++++++++++++++++++++++++++-------------------
>  1 file changed, 31 insertions(+), 19 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index c83145a..3be372b 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -133,21 +133,20 @@ getname_flags(const char __user *filename, int flags, int *empty)
>         if (result)
>                 return result;
>
> -       result = __getname();
> -       if (unlikely(!result))
> +       kname = __getname();
> +       if (unlikely(!kname))
>                 return ERR_PTR(-ENOMEM);
> -       result->refcnt = 1;
>
>         /*
>          * First, try to embed the struct filename inside the names_cache
>          * allocation
>          */
> -       kname = (char *)result + sizeof(*result);
> +       result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
>         result->name = kname;
>         result->separate = false;
> +       result->refcnt = 1;
>         max = EMBEDDED_NAME_MAX;
>
> -recopy:
>         len = strncpy_from_user(kname, filename, max);
>         if (unlikely(len < 0)) {
>                 err = ERR_PTR(len);
> @@ -157,23 +156,34 @@ recopy:
>         /*
>          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
>          * separate struct filename so we can dedicate the entire
> -        * names_cache allocation for the pathname, and re-do the copy from
> +        * names_cache allocation for the pathname, and continue the copy from
>          * userland.
>          */
> -       if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
> -               kname = (char *)result;
> -
> +       if (len == EMBEDDED_NAME_MAX) {
>                 result = kzalloc(sizeof(*result), GFP_KERNEL);
>                 if (!result) {
>                         err = ERR_PTR(-ENOMEM);
> -                       result = (struct filename *)kname;
> +                       result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
>                         goto error;
>                 }
>                 result->name = kname;
>                 result->separate = true;
>                 result->refcnt = 1;
> -               max = PATH_MAX;
> -               goto recopy;
> +               max = PATH_MAX - EMBEDDED_NAME_MAX;
> +               /* we can't simply add the number of rest chars we copy to len,
> +                * since strncpy_from_user may return negative to indicate
> +                * something is wrong, so we do the addition later, after
> +                * strncpy_from_user succeeds, and we know we already copy
> +                * EMBEDDED_NAME_MAX chars.
> +                */
> +               len = strncpy_from_user(kname + EMBEDDED_NAME_MAX,
> +                               filename + EMBEDDED_NAME_MAX, max);
> +
> +               if (unlikely(len < 0)) {
> +                       err = ERR_PTR(len);
> +                       goto error;
> +               }
> +               len += EMBEDDED_NAME_MAX;
>         }
>
>         /* The empty path is special. */
> @@ -209,28 +219,30 @@ struct filename *
>  getname_kernel(const char * filename)
>  {
>         struct filename *result;
> +       char *kname;
>         int len = strlen(filename) + 1;
>
> -       result = __getname();
> -       if (unlikely(!result))
> +       kname = __getname();
> +       if (unlikely(!kname))
>                 return ERR_PTR(-ENOMEM);
>
>         if (len <= EMBEDDED_NAME_MAX) {
> -               result->name = (char *)(result) + sizeof(*result);
> +               result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> +               result->name = kname;
>                 result->separate = false;
>         } else if (len <= PATH_MAX) {
>                 struct filename *tmp;
>
>                 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
>                 if (unlikely(!tmp)) {
> -                       __putname(result);
> +                       __putname(kname);
>                         return ERR_PTR(-ENOMEM);
>                 }
> -               tmp->name = (char *)result;
> +               tmp->name = kname;
>                 tmp->separate = true;
>                 result = tmp;
>         } else {
> -               __putname(result);
> +               __putname(kname);
>                 return ERR_PTR(-ENAMETOOLONG);
>         }
>         memcpy((char *)result->name, filename, len);
> @@ -253,7 +265,7 @@ void putname(struct filename *name)
>                 __putname(name->name);
>                 kfree(name);
>         } else
> -               __putname(name);
> +               __putname(name->name);
>  }
>
>  static int check_acl(struct inode *inode, int mask)
> --
> 2.3.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore March 13, 2015, 1:45 p.m. UTC | #2
On Monday, March 09, 2015 04:24:32 PM Boqun Feng wrote:
> Ping.
> Any opinion?

You might want to look at some of the recent changes to Al's vfs.git#for-next 
branch; at the very least it looks like your patch should be rebased against 
those changes.

> On Wed, Feb 25, 2015 at 8:31 PM, Boqun Feng <boqun.feng@gmail.com> wrote:
> > In the current implementation of getname_flags, filename in the
> > user-space will be recopied if it takes more space that
> > EMBEDDED_NAME_MAX, however, at this moment, EMBEDDED_NAME_MAX bytes of
> > the filename are already copied into kernel space, the only reason why
> > the recopy is needed is that "kname" needs to be relocated.
> > 
> > And the recopy can be avoided if we change the memory layout of the
> > "names_cache" allocation. By putting the struct "filename" at the tail
> > of the allocation instead of the head, relocation of kname is avoided.
> > 
> > Once putting the struct at the tail, each byte in the user space will be
> > copied only one time, so the recopy is avoided and code is more clear.
> > 
> > Of course, other functions aware of the layout of the names_cache
> > allocation, i.e., getname_kernel and putname also need to be modified to
> > adapt to the new layout.
> > 
> > This patch is based on v4.0-rc1.
> > 
> > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > Cc: Paul Moore <pmoore@redhat.com>
> > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > ---
> > 
> >  fs/namei.c | 50 +++++++++++++++++++++++++++++++-------------------
> >  1 file changed, 31 insertions(+), 19 deletions(-)
> > 
> > diff --git a/fs/namei.c b/fs/namei.c
> > index c83145a..3be372b 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -133,21 +133,20 @@ getname_flags(const char __user *filename, int
> > flags, int *empty)> 
> >         if (result)
> >         
> >                 return result;
> > 
> > -       result = __getname();
> > -       if (unlikely(!result))
> > +       kname = __getname();
> > +       if (unlikely(!kname))
> > 
> >                 return ERR_PTR(-ENOMEM);
> > 
> > -       result->refcnt = 1;
> > 
> >         /*
> >         
> >          * First, try to embed the struct filename inside the names_cache
> >          * allocation
> >          */
> > 
> > -       kname = (char *)result + sizeof(*result);
> > +       result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > 
> >         result->name = kname;
> >         result->separate = false;
> > 
> > +       result->refcnt = 1;
> > 
> >         max = EMBEDDED_NAME_MAX;
> > 
> > -recopy:
> >         len = strncpy_from_user(kname, filename, max);
> >         if (unlikely(len < 0)) {
> >         
> >                 err = ERR_PTR(len);
> > 
> > @@ -157,23 +156,34 @@ recopy:
> >         /*
> >         
> >          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
> >          * separate struct filename so we can dedicate the entire
> > 
> > -        * names_cache allocation for the pathname, and re-do the copy
> > from
> > +        * names_cache allocation for the pathname, and continue the copy
> > from> 
> >          * userland.
> >          */
> > 
> > -       if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
> > -               kname = (char *)result;
> > -
> > +       if (len == EMBEDDED_NAME_MAX) {
> > 
> >                 result = kzalloc(sizeof(*result), GFP_KERNEL);
> >                 if (!result) {
> >                 
> >                         err = ERR_PTR(-ENOMEM);
> > 
> > -                       result = (struct filename *)kname;
> > +                       result = (struct filename *)(kname +
> > EMBEDDED_NAME_MAX);> 
> >                         goto error;
> >                 
> >                 }
> >                 result->name = kname;
> >                 result->separate = true;
> >                 result->refcnt = 1;
> > 
> > -               max = PATH_MAX;
> > -               goto recopy;
> > +               max = PATH_MAX - EMBEDDED_NAME_MAX;
> > +               /* we can't simply add the number of rest chars we copy to
> > len, +                * since strncpy_from_user may return negative to
> > indicate +                * something is wrong, so we do the addition
> > later, after +                * strncpy_from_user succeeds, and we know
> > we already copy +                * EMBEDDED_NAME_MAX chars.
> > +                */
> > +               len = strncpy_from_user(kname + EMBEDDED_NAME_MAX,
> > +                               filename + EMBEDDED_NAME_MAX, max);
> > +
> > +               if (unlikely(len < 0)) {
> > +                       err = ERR_PTR(len);
> > +                       goto error;
> > +               }
> > +               len += EMBEDDED_NAME_MAX;
> > 
> >         }
> >         
> >         /* The empty path is special. */
> > 
> > @@ -209,28 +219,30 @@ struct filename *
> > 
> >  getname_kernel(const char * filename)
> >  {
> >  
> >         struct filename *result;
> > 
> > +       char *kname;
> > 
> >         int len = strlen(filename) + 1;
> > 
> > -       result = __getname();
> > -       if (unlikely(!result))
> > +       kname = __getname();
> > +       if (unlikely(!kname))
> > 
> >                 return ERR_PTR(-ENOMEM);
> >         
> >         if (len <= EMBEDDED_NAME_MAX) {
> > 
> > -               result->name = (char *)(result) + sizeof(*result);
> > +               result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > +               result->name = kname;
> > 
> >                 result->separate = false;
> >         
> >         } else if (len <= PATH_MAX) {
> >         
> >                 struct filename *tmp;
> >                 
> >                 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
> >                 if (unlikely(!tmp)) {
> > 
> > -                       __putname(result);
> > +                       __putname(kname);
> > 
> >                         return ERR_PTR(-ENOMEM);
> >                 
> >                 }
> > 
> > -               tmp->name = (char *)result;
> > +               tmp->name = kname;
> > 
> >                 tmp->separate = true;
> >                 result = tmp;
> >         
> >         } else {
> > 
> > -               __putname(result);
> > +               __putname(kname);
> > 
> >                 return ERR_PTR(-ENAMETOOLONG);
> >         
> >         }
> >         memcpy((char *)result->name, filename, len);
> > 
> > @@ -253,7 +265,7 @@ void putname(struct filename *name)
> > 
> >                 __putname(name->name);
> >                 kfree(name);
> >         
> >         } else
> > 
> > -               __putname(name);
> > +               __putname(name->name);
> > 
> >  }
> >  
> >  static int check_acl(struct inode *inode, int mask)
> > 
> > --
> > 2.3.0
Boqun Feng March 18, 2015, 5:27 a.m. UTC | #3
On Fri, Mar 13, 2015 at 09:45:59AM -0400, Paul Moore wrote:
> On Monday, March 09, 2015 04:24:32 PM Boqun Feng wrote:
> > Ping.
> > Any opinion?
> 
> You might want to look at some of the recent changes to Al's vfs.git#for-next 
> branch; at the very least it looks like your patch should be rebased against 
> those changes.

Thank you for your reminder ;-)

After learning several changes on that branch, I get a question for
commit ca160d0 "kill struct filename.separate".
   
I think the two following situations explains how that commit works.
(`iname` and `name` are fields in struct filename)

Not separate:
   |---PATH_MAX bytes by names_cachep ------|
   |--struct filename--|---space for name---|
                       ^
                       iname/name

name->iname == name->name is true
 
Separate:
   |--by kzalloc ------|      |---PATH_MAX bytes by names_cachep---|
   |--struct filename--|......|---space for name-------------------|
                       ^      ^
                       iname  name

name->iname == name->name is false

However, I think of a third situation, which we were unlucky, that
the bytes allocated by kzalloc and the bytes allocated by names_cachep
somehow become continous, like the following situation:

Separate:
   |--by kzalloc ------|---PATH_MAX bytes by names_cachep ------|
   |--struct filename--|---space for name-----------------------|
                       ^      
                       iname/name 

In this situation, the struct and the name are separate but 
name->iname == name->name is true

Since struct filename is small, so kzalloc will call kmem_cache_alloc
actually. As I don't know much about kmem_cache allocators, my question
is "Can the 'unlucky' situation happen now?" If the answer is no, can it
happen in the future considering there may be new kmem_cache allocating
algorithms?
   
Thanks and Best Regards,
Boqun Feng

> 
> > On Wed, Feb 25, 2015 at 8:31 PM, Boqun Feng <boqun.feng@gmail.com> wrote:
> > > In the current implementation of getname_flags, filename in the
> > > user-space will be recopied if it takes more space that
> > > EMBEDDED_NAME_MAX, however, at this moment, EMBEDDED_NAME_MAX bytes of
> > > the filename are already copied into kernel space, the only reason why
> > > the recopy is needed is that "kname" needs to be relocated.
> > > 
> > > And the recopy can be avoided if we change the memory layout of the
> > > "names_cache" allocation. By putting the struct "filename" at the tail
> > > of the allocation instead of the head, relocation of kname is avoided.
> > > 
> > > Once putting the struct at the tail, each byte in the user space will be
> > > copied only one time, so the recopy is avoided and code is more clear.
> > > 
> > > Of course, other functions aware of the layout of the names_cache
> > > allocation, i.e., getname_kernel and putname also need to be modified to
> > > adapt to the new layout.
> > > 
> > > This patch is based on v4.0-rc1.
> > > 
> > > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > > Cc: Paul Moore <pmoore@redhat.com>
> > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > ---
> > > 
> > >  fs/namei.c | 50 +++++++++++++++++++++++++++++++-------------------
> > >  1 file changed, 31 insertions(+), 19 deletions(-)
> > > 
> > > diff --git a/fs/namei.c b/fs/namei.c
> > > index c83145a..3be372b 100644
> > > --- a/fs/namei.c
> > > +++ b/fs/namei.c
> > > @@ -133,21 +133,20 @@ getname_flags(const char __user *filename, int
> > > flags, int *empty)> 
> > >         if (result)
> > >         
> > >                 return result;
> > > 
> > > -       result = __getname();
> > > -       if (unlikely(!result))
> > > +       kname = __getname();
> > > +       if (unlikely(!kname))
> > > 
> > >                 return ERR_PTR(-ENOMEM);
> > > 
> > > -       result->refcnt = 1;
> > > 
> > >         /*
> > >         
> > >          * First, try to embed the struct filename inside the names_cache
> > >          * allocation
> > >          */
> > > 
> > > -       kname = (char *)result + sizeof(*result);
> > > +       result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > > 
> > >         result->name = kname;
> > >         result->separate = false;
> > > 
> > > +       result->refcnt = 1;
> > > 
> > >         max = EMBEDDED_NAME_MAX;
> > > 
> > > -recopy:
> > >         len = strncpy_from_user(kname, filename, max);
> > >         if (unlikely(len < 0)) {
> > >         
> > >                 err = ERR_PTR(len);
> > > 
> > > @@ -157,23 +156,34 @@ recopy:
> > >         /*
> > >         
> > >          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
> > >          * separate struct filename so we can dedicate the entire
> > > 
> > > -        * names_cache allocation for the pathname, and re-do the copy
> > > from
> > > +        * names_cache allocation for the pathname, and continue the copy
> > > from> 
> > >          * userland.
> > >          */
> > > 
> > > -       if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
> > > -               kname = (char *)result;
> > > -
> > > +       if (len == EMBEDDED_NAME_MAX) {
> > > 
> > >                 result = kzalloc(sizeof(*result), GFP_KERNEL);
> > >                 if (!result) {
> > >                 
> > >                         err = ERR_PTR(-ENOMEM);
> > > 
> > > -                       result = (struct filename *)kname;
> > > +                       result = (struct filename *)(kname +
> > > EMBEDDED_NAME_MAX);> 
> > >                         goto error;
> > >                 
> > >                 }
> > >                 result->name = kname;
> > >                 result->separate = true;
> > >                 result->refcnt = 1;
> > > 
> > > -               max = PATH_MAX;
> > > -               goto recopy;
> > > +               max = PATH_MAX - EMBEDDED_NAME_MAX;
> > > +               /* we can't simply add the number of rest chars we copy to
> > > len, +                * since strncpy_from_user may return negative to
> > > indicate +                * something is wrong, so we do the addition
> > > later, after +                * strncpy_from_user succeeds, and we know
> > > we already copy +                * EMBEDDED_NAME_MAX chars.
> > > +                */
> > > +               len = strncpy_from_user(kname + EMBEDDED_NAME_MAX,
> > > +                               filename + EMBEDDED_NAME_MAX, max);
> > > +
> > > +               if (unlikely(len < 0)) {
> > > +                       err = ERR_PTR(len);
> > > +                       goto error;
> > > +               }
> > > +               len += EMBEDDED_NAME_MAX;
> > > 
> > >         }
> > >         
> > >         /* The empty path is special. */
> > > 
> > > @@ -209,28 +219,30 @@ struct filename *
> > > 
> > >  getname_kernel(const char * filename)
> > >  {
> > >  
> > >         struct filename *result;
> > > 
> > > +       char *kname;
> > > 
> > >         int len = strlen(filename) + 1;
> > > 
> > > -       result = __getname();
> > > -       if (unlikely(!result))
> > > +       kname = __getname();
> > > +       if (unlikely(!kname))
> > > 
> > >                 return ERR_PTR(-ENOMEM);
> > >         
> > >         if (len <= EMBEDDED_NAME_MAX) {
> > > 
> > > -               result->name = (char *)(result) + sizeof(*result);
> > > +               result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > > +               result->name = kname;
> > > 
> > >                 result->separate = false;
> > >         
> > >         } else if (len <= PATH_MAX) {
> > >         
> > >                 struct filename *tmp;
> > >                 
> > >                 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
> > >                 if (unlikely(!tmp)) {
> > > 
> > > -                       __putname(result);
> > > +                       __putname(kname);
> > > 
> > >                         return ERR_PTR(-ENOMEM);
> > >                 
> > >                 }
> > > 
> > > -               tmp->name = (char *)result;
> > > +               tmp->name = kname;
> > > 
> > >                 tmp->separate = true;
> > >                 result = tmp;
> > >         
> > >         } else {
> > > 
> > > -               __putname(result);
> > > +               __putname(kname);
> > > 
> > >                 return ERR_PTR(-ENAMETOOLONG);
> > >         
> > >         }
> > >         memcpy((char *)result->name, filename, len);
> > > 
> > > @@ -253,7 +265,7 @@ void putname(struct filename *name)
> > > 
> > >                 __putname(name->name);
> > >                 kfree(name);
> > >         
> > >         } else
> > > 
> > > -               __putname(name);
> > > +               __putname(name->name);
> > > 
> > >  }
> > >  
> > >  static int check_acl(struct inode *inode, int mask)
> > > 
> > > --
> > > 2.3.0
> 
> -- 
> paul moore
> security @ redhat
>
Boqun Feng March 21, 2015, 4:22 a.m. UTC | #4
Hi Al,

Ping and sorry to bother you.
Could you please have a look at my question? Thank you!

Regards,
Boqun Feng

On Wed, Mar 18, 2015 at 01:27:24PM +0800, Boqun Feng wrote:
> On Fri, Mar 13, 2015 at 09:45:59AM -0400, Paul Moore wrote:
> > On Monday, March 09, 2015 04:24:32 PM Boqun Feng wrote:
> > > Ping.
> > > Any opinion?
> > 
> > You might want to look at some of the recent changes to Al's vfs.git#for-next 
> > branch; at the very least it looks like your patch should be rebased against 
> > those changes.
> 
> Thank you for your reminder ;-)
> 
> After learning several changes on that branch, I get a question for
> commit ca160d0 "kill struct filename.separate".
>    
> I think the two following situations explains how that commit works.
> (`iname` and `name` are fields in struct filename)
> 
> Not separate:
>    |---PATH_MAX bytes by names_cachep ------|
>    |--struct filename--|---space for name---|
>                        ^
>                        iname/name
> 
> name->iname == name->name is true
>  
> Separate:
>    |--by kzalloc ------|      |---PATH_MAX bytes by names_cachep---|
>    |--struct filename--|......|---space for name-------------------|
>                        ^      ^
>                        iname  name
> 
> name->iname == name->name is false
> 
> However, I think of a third situation, which we were unlucky, that
> the bytes allocated by kzalloc and the bytes allocated by names_cachep
> somehow become continous, like the following situation:
> 
> Separate:
>    |--by kzalloc ------|---PATH_MAX bytes by names_cachep ------|
>    |--struct filename--|---space for name-----------------------|
>                        ^      
>                        iname/name 
> 
> In this situation, the struct and the name are separate but 
> name->iname == name->name is true
> 
> Since struct filename is small, so kzalloc will call kmem_cache_alloc
> actually. As I don't know much about kmem_cache allocators, my question
> is "Can the 'unlucky' situation happen now?" If the answer is no, can it
> happen in the future considering there may be new kmem_cache allocating
> algorithms?
>    
> Thanks and Best Regards,
> Boqun Feng
> 
> > 
> > > On Wed, Feb 25, 2015 at 8:31 PM, Boqun Feng <boqun.feng@gmail.com> wrote:
> > > > In the current implementation of getname_flags, filename in the
> > > > user-space will be recopied if it takes more space that
> > > > EMBEDDED_NAME_MAX, however, at this moment, EMBEDDED_NAME_MAX bytes of
> > > > the filename are already copied into kernel space, the only reason why
> > > > the recopy is needed is that "kname" needs to be relocated.
> > > > 
> > > > And the recopy can be avoided if we change the memory layout of the
> > > > "names_cache" allocation. By putting the struct "filename" at the tail
> > > > of the allocation instead of the head, relocation of kname is avoided.
> > > > 
> > > > Once putting the struct at the tail, each byte in the user space will be
> > > > copied only one time, so the recopy is avoided and code is more clear.
> > > > 
> > > > Of course, other functions aware of the layout of the names_cache
> > > > allocation, i.e., getname_kernel and putname also need to be modified to
> > > > adapt to the new layout.
> > > > 
> > > > This patch is based on v4.0-rc1.
> > > > 
> > > > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > > > Cc: Paul Moore <pmoore@redhat.com>
> > > > Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
> > > > ---
> > > > 
> > > >  fs/namei.c | 50 +++++++++++++++++++++++++++++++-------------------
> > > >  1 file changed, 31 insertions(+), 19 deletions(-)
> > > > 
> > > > diff --git a/fs/namei.c b/fs/namei.c
> > > > index c83145a..3be372b 100644
> > > > --- a/fs/namei.c
> > > > +++ b/fs/namei.c
> > > > @@ -133,21 +133,20 @@ getname_flags(const char __user *filename, int
> > > > flags, int *empty)> 
> > > >         if (result)
> > > >         
> > > >                 return result;
> > > > 
> > > > -       result = __getname();
> > > > -       if (unlikely(!result))
> > > > +       kname = __getname();
> > > > +       if (unlikely(!kname))
> > > > 
> > > >                 return ERR_PTR(-ENOMEM);
> > > > 
> > > > -       result->refcnt = 1;
> > > > 
> > > >         /*
> > > >         
> > > >          * First, try to embed the struct filename inside the names_cache
> > > >          * allocation
> > > >          */
> > > > 
> > > > -       kname = (char *)result + sizeof(*result);
> > > > +       result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > > > 
> > > >         result->name = kname;
> > > >         result->separate = false;
> > > > 
> > > > +       result->refcnt = 1;
> > > > 
> > > >         max = EMBEDDED_NAME_MAX;
> > > > 
> > > > -recopy:
> > > >         len = strncpy_from_user(kname, filename, max);
> > > >         if (unlikely(len < 0)) {
> > > >         
> > > >                 err = ERR_PTR(len);
> > > > 
> > > > @@ -157,23 +156,34 @@ recopy:
> > > >         /*
> > > >         
> > > >          * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
> > > >          * separate struct filename so we can dedicate the entire
> > > > 
> > > > -        * names_cache allocation for the pathname, and re-do the copy
> > > > from
> > > > +        * names_cache allocation for the pathname, and continue the copy
> > > > from> 
> > > >          * userland.
> > > >          */
> > > > 
> > > > -       if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
> > > > -               kname = (char *)result;
> > > > -
> > > > +       if (len == EMBEDDED_NAME_MAX) {
> > > > 
> > > >                 result = kzalloc(sizeof(*result), GFP_KERNEL);
> > > >                 if (!result) {
> > > >                 
> > > >                         err = ERR_PTR(-ENOMEM);
> > > > 
> > > > -                       result = (struct filename *)kname;
> > > > +                       result = (struct filename *)(kname +
> > > > EMBEDDED_NAME_MAX);> 
> > > >                         goto error;
> > > >                 
> > > >                 }
> > > >                 result->name = kname;
> > > >                 result->separate = true;
> > > >                 result->refcnt = 1;
> > > > 
> > > > -               max = PATH_MAX;
> > > > -               goto recopy;
> > > > +               max = PATH_MAX - EMBEDDED_NAME_MAX;
> > > > +               /* we can't simply add the number of rest chars we copy to
> > > > len, +                * since strncpy_from_user may return negative to
> > > > indicate +                * something is wrong, so we do the addition
> > > > later, after +                * strncpy_from_user succeeds, and we know
> > > > we already copy +                * EMBEDDED_NAME_MAX chars.
> > > > +                */
> > > > +               len = strncpy_from_user(kname + EMBEDDED_NAME_MAX,
> > > > +                               filename + EMBEDDED_NAME_MAX, max);
> > > > +
> > > > +               if (unlikely(len < 0)) {
> > > > +                       err = ERR_PTR(len);
> > > > +                       goto error;
> > > > +               }
> > > > +               len += EMBEDDED_NAME_MAX;
> > > > 
> > > >         }
> > > >         
> > > >         /* The empty path is special. */
> > > > 
> > > > @@ -209,28 +219,30 @@ struct filename *
> > > > 
> > > >  getname_kernel(const char * filename)
> > > >  {
> > > >  
> > > >         struct filename *result;
> > > > 
> > > > +       char *kname;
> > > > 
> > > >         int len = strlen(filename) + 1;
> > > > 
> > > > -       result = __getname();
> > > > -       if (unlikely(!result))
> > > > +       kname = __getname();
> > > > +       if (unlikely(!kname))
> > > > 
> > > >                 return ERR_PTR(-ENOMEM);
> > > >         
> > > >         if (len <= EMBEDDED_NAME_MAX) {
> > > > 
> > > > -               result->name = (char *)(result) + sizeof(*result);
> > > > +               result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
> > > > +               result->name = kname;
> > > > 
> > > >                 result->separate = false;
> > > >         
> > > >         } else if (len <= PATH_MAX) {
> > > >         
> > > >                 struct filename *tmp;
> > > >                 
> > > >                 tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
> > > >                 if (unlikely(!tmp)) {
> > > > 
> > > > -                       __putname(result);
> > > > +                       __putname(kname);
> > > > 
> > > >                         return ERR_PTR(-ENOMEM);
> > > >                 
> > > >                 }
> > > > 
> > > > -               tmp->name = (char *)result;
> > > > +               tmp->name = kname;
> > > > 
> > > >                 tmp->separate = true;
> > > >                 result = tmp;
> > > >         
> > > >         } else {
> > > > 
> > > > -               __putname(result);
> > > > +               __putname(kname);
> > > > 
> > > >                 return ERR_PTR(-ENAMETOOLONG);
> > > >         
> > > >         }
> > > >         memcpy((char *)result->name, filename, len);
> > > > 
> > > > @@ -253,7 +265,7 @@ void putname(struct filename *name)
> > > > 
> > > >                 __putname(name->name);
> > > >                 kfree(name);
> > > >         
> > > >         } else
> > > > 
> > > > -               __putname(name);
> > > > +               __putname(name->name);
> > > > 
> > > >  }
> > > >  
> > > >  static int check_acl(struct inode *inode, int mask)
> > > > 
> > > > --
> > > > 2.3.0
> > 
> > -- 
> > paul moore
> > security @ redhat
> >
diff mbox

Patch

diff --git a/fs/namei.c b/fs/namei.c
index c83145a..3be372b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -133,21 +133,20 @@  getname_flags(const char __user *filename, int flags, int *empty)
 	if (result)
 		return result;
 
-	result = __getname();
-	if (unlikely(!result))
+	kname = __getname();
+	if (unlikely(!kname))
 		return ERR_PTR(-ENOMEM);
-	result->refcnt = 1;
 
 	/*
 	 * First, try to embed the struct filename inside the names_cache
 	 * allocation
 	 */
-	kname = (char *)result + sizeof(*result);
+	result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
 	result->name = kname;
 	result->separate = false;
+	result->refcnt = 1;
 	max = EMBEDDED_NAME_MAX;
 
-recopy:
 	len = strncpy_from_user(kname, filename, max);
 	if (unlikely(len < 0)) {
 		err = ERR_PTR(len);
@@ -157,23 +156,34 @@  recopy:
 	/*
 	 * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a
 	 * separate struct filename so we can dedicate the entire
-	 * names_cache allocation for the pathname, and re-do the copy from
+	 * names_cache allocation for the pathname, and continue the copy from
 	 * userland.
 	 */
-	if (len == EMBEDDED_NAME_MAX && max == EMBEDDED_NAME_MAX) {
-		kname = (char *)result;
-
+	if (len == EMBEDDED_NAME_MAX) {
 		result = kzalloc(sizeof(*result), GFP_KERNEL);
 		if (!result) {
 			err = ERR_PTR(-ENOMEM);
-			result = (struct filename *)kname;
+			result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
 			goto error;
 		}
 		result->name = kname;
 		result->separate = true;
 		result->refcnt = 1;
-		max = PATH_MAX;
-		goto recopy;
+		max = PATH_MAX - EMBEDDED_NAME_MAX;
+		/* we can't simply add the number of rest chars we copy to len,
+		 * since strncpy_from_user may return negative to indicate
+		 * something is wrong, so we do the addition later, after
+		 * strncpy_from_user succeeds, and we know we already copy
+		 * EMBEDDED_NAME_MAX chars.
+		 */
+		len = strncpy_from_user(kname + EMBEDDED_NAME_MAX,
+				filename + EMBEDDED_NAME_MAX, max);
+
+		if (unlikely(len < 0)) {
+			err = ERR_PTR(len);
+			goto error;
+		}
+		len += EMBEDDED_NAME_MAX;
 	}
 
 	/* The empty path is special. */
@@ -209,28 +219,30 @@  struct filename *
 getname_kernel(const char * filename)
 {
 	struct filename *result;
+	char *kname;
 	int len = strlen(filename) + 1;
 
-	result = __getname();
-	if (unlikely(!result))
+	kname = __getname();
+	if (unlikely(!kname))
 		return ERR_PTR(-ENOMEM);
 
 	if (len <= EMBEDDED_NAME_MAX) {
-		result->name = (char *)(result) + sizeof(*result);
+		result = (struct filename *)(kname + EMBEDDED_NAME_MAX);
+		result->name = kname;
 		result->separate = false;
 	} else if (len <= PATH_MAX) {
 		struct filename *tmp;
 
 		tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
 		if (unlikely(!tmp)) {
-			__putname(result);
+			__putname(kname);
 			return ERR_PTR(-ENOMEM);
 		}
-		tmp->name = (char *)result;
+		tmp->name = kname;
 		tmp->separate = true;
 		result = tmp;
 	} else {
-		__putname(result);
+		__putname(kname);
 		return ERR_PTR(-ENAMETOOLONG);
 	}
 	memcpy((char *)result->name, filename, len);
@@ -253,7 +265,7 @@  void putname(struct filename *name)
 		__putname(name->name);
 		kfree(name);
 	} else
-		__putname(name);
+		__putname(name->name);
 }
 
 static int check_acl(struct inode *inode, int mask)