diff mbox series

[v2,1/4] ceph: always clone dentry name when building MDS request

Message ID 20190417183959.11296-2-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series ceph: dentry->d_name handing fixes | expand

Commit Message

Jeff Layton April 17, 2019, 6:39 p.m. UTC
Ben reported tripping the BUG_ON in create_request_message during some
performance testing. Analysis of the vmcore showed that the length of
the r_dentry->d_name string changed after we allocated the buffer, but
before we encoded it.

build_dentry_path returns pointers to d_name in the common case of
non-snapped dentries, but this optimization isn't safe. Instead, make a
copy of the string in this case. That is less efficient, but safe.

Cc: stable@vger.kernel.org
Reported-by: Ben England <bengland@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/mds_client.c | 40 ++++++++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 8 deletions(-)

Comments

Yan, Zheng April 18, 2019, 2:42 a.m. UTC | #1
How about cloning name only when parent dir is not locked.

Regards
Yan, Zheng

On Thu, Apr 18, 2019 at 2:43 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> Ben reported tripping the BUG_ON in create_request_message during some
> performance testing. Analysis of the vmcore showed that the length of
> the r_dentry->d_name string changed after we allocated the buffer, but
> before we encoded it.
>
> build_dentry_path returns pointers to d_name in the common case of
> non-snapped dentries, but this optimization isn't safe. Instead, make a
> copy of the string in this case. That is less efficient, but safe.
>
> Cc: stable@vger.kernel.org
> Reported-by: Ben England <bengland@redhat.com>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
> ---
>  fs/ceph/mds_client.c | 40 ++++++++++++++++++++++++++++++++--------
>  1 file changed, 32 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index e09feadd2ae1..4cfefe118128 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -2159,9 +2159,35 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
>         return path;
>  }
>
> +/* Duplicate the dentry->d_name.name safely */
> +static int clone_dentry_name(struct dentry *dentry, const char **ppath,
> +                            int *ppathlen)
> +{
> +       u32 len;
> +       char *name;
> +retry:
> +       len = READ_ONCE(dentry->d_name.len);
> +       name = kmalloc(len + 1, GFP_NOFS);
> +       if (!name)
> +               return -ENOMEM;
> +
> +       spin_lock(&dentry->d_lock);
> +       if (dentry->d_name.len != len) {
> +               spin_unlock(&dentry->d_lock);
> +               kfree(name);
> +               goto retry;
> +       }
> +       memcpy(name, dentry->d_name.name, len);
> +       spin_unlock(&dentry->d_lock);
> +
> +       name[len] = '\0';
> +       *ppath = name;
> +       *ppathlen = len;
> +       return 0;
> +}
> +
>  static int build_dentry_path(struct dentry *dentry, struct inode *dir,
> -                            const char **ppath, int *ppathlen, u64 *pino,
> -                            int *pfreepath)
> +                            const char **ppath, int *ppathlen, u64 *pino)
>  {
>         char *path;
>
> @@ -2171,16 +2197,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
>         if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
>                 *pino = ceph_ino(dir);
>                 rcu_read_unlock();
> -               *ppath = dentry->d_name.name;
> -               *ppathlen = dentry->d_name.len;
> -               return 0;
> +               return clone_dentry_name(dentry, ppath, ppathlen);
>         }
>         rcu_read_unlock();
>         path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
>         if (IS_ERR(path))
>                 return PTR_ERR(path);
>         *ppath = path;
> -       *pfreepath = 1;
>         return 0;
>  }
>
> @@ -2222,8 +2245,9 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
>                 dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
>                      ceph_snap(rinode));
>         } else if (rdentry) {
> -               r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
> -                                       freepath);
> +               r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino);
> +               if (!r)
> +                       *freepath = 1;
>                 dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
>                      *ppath);
>         } else if (rpath || rino) {
> --
> 2.20.1
>
Jeff Layton April 18, 2019, 11:15 a.m. UTC | #2
On Wed, Apr 17, 2019 at 10:42 PM Yan, Zheng <ukernel@gmail.com> wrote:
>
> How about cloning name only when parent dir is not locked.
>
> Regards
> Yan, Zheng
>
> On Thu, Apr 18, 2019 at 2:43 AM Jeff Layton <jlayton@kernel.org> wrote:
> >
> > Ben reported tripping the BUG_ON in create_request_message during some
> > performance testing. Analysis of the vmcore showed that the length of
> > the r_dentry->d_name string changed after we allocated the buffer, but
> > before we encoded it.
> >
> > build_dentry_path returns pointers to d_name in the common case of
> > non-snapped dentries, but this optimization isn't safe. Instead, make a
> > copy of the string in this case. That is less efficient, but safe.
> >
> > Cc: stable@vger.kernel.org
> > Reported-by: Ben England <bengland@redhat.com>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
> > ---
> >  fs/ceph/mds_client.c | 40 ++++++++++++++++++++++++++++++++--------
> >  1 file changed, 32 insertions(+), 8 deletions(-)
> >
> > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> > index e09feadd2ae1..4cfefe118128 100644
> > --- a/fs/ceph/mds_client.c
> > +++ b/fs/ceph/mds_client.c
> > @@ -2159,9 +2159,35 @@ char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
> >         return path;
> >  }
> >
> > +/* Duplicate the dentry->d_name.name safely */
> > +static int clone_dentry_name(struct dentry *dentry, const char **ppath,
> > +                            int *ppathlen)
> > +{
> > +       u32 len;
> > +       char *name;
> > +retry:
> > +       len = READ_ONCE(dentry->d_name.len);
> > +       name = kmalloc(len + 1, GFP_NOFS);
> > +       if (!name)
> > +               return -ENOMEM;
> > +
> > +       spin_lock(&dentry->d_lock);
> > +       if (dentry->d_name.len != len) {
> > +               spin_unlock(&dentry->d_lock);
> > +               kfree(name);
> > +               goto retry;
> > +       }
> > +       memcpy(name, dentry->d_name.name, len);
> > +       spin_unlock(&dentry->d_lock);
> > +
> > +       name[len] = '\0';
> > +       *ppath = name;
> > +       *ppathlen = len;
> > +       return 0;
> > +}
> > +
> >  static int build_dentry_path(struct dentry *dentry, struct inode *dir,
> > -                            const char **ppath, int *ppathlen, u64 *pino,
> > -                            int *pfreepath)
> > +                            const char **ppath, int *ppathlen, u64 *pino)
> >  {
> >         char *path;
> >
> > @@ -2171,16 +2197,13 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir,
> >         if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
> >                 *pino = ceph_ino(dir);
> >                 rcu_read_unlock();
> > -               *ppath = dentry->d_name.name;
> > -               *ppathlen = dentry->d_name.len;
> > -               return 0;
> > +               return clone_dentry_name(dentry, ppath, ppathlen);
> >         }
> >         rcu_read_unlock();
> >         path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
> >         if (IS_ERR(path))
> >                 return PTR_ERR(path);
> >         *ppath = path;
> > -       *pfreepath = 1;
> >         return 0;
> >  }
> >
> > @@ -2222,8 +2245,9 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
> >                 dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
> >                      ceph_snap(rinode));
> >         } else if (rdentry) {
> > -               r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
> > -                                       freepath);
> > +               r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino);
> > +               if (!r)
> > +                       *freepath = 1;
> >                 dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
> >                      *ppath);
> >         } else if (rpath || rino) {
> > --
> > 2.20.1
> >

Good idea. I'll respin this one.
diff mbox series

Patch

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e09feadd2ae1..4cfefe118128 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2159,9 +2159,35 @@  char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
 	return path;
 }
 
+/* Duplicate the dentry->d_name.name safely */
+static int clone_dentry_name(struct dentry *dentry, const char **ppath,
+			     int *ppathlen)
+{
+	u32 len;
+	char *name;
+retry:
+	len = READ_ONCE(dentry->d_name.len);
+	name = kmalloc(len + 1, GFP_NOFS);
+	if (!name)
+		return -ENOMEM;
+
+	spin_lock(&dentry->d_lock);
+	if (dentry->d_name.len != len) {
+		spin_unlock(&dentry->d_lock);
+		kfree(name);
+		goto retry;
+	}
+	memcpy(name, dentry->d_name.name, len);
+	spin_unlock(&dentry->d_lock);
+
+	name[len] = '\0';
+	*ppath = name;
+	*ppathlen = len;
+	return 0;
+}
+
 static int build_dentry_path(struct dentry *dentry, struct inode *dir,
-			     const char **ppath, int *ppathlen, u64 *pino,
-			     int *pfreepath)
+			     const char **ppath, int *ppathlen, u64 *pino)
 {
 	char *path;
 
@@ -2171,16 +2197,13 @@  static int build_dentry_path(struct dentry *dentry, struct inode *dir,
 	if (dir && ceph_snap(dir) == CEPH_NOSNAP) {
 		*pino = ceph_ino(dir);
 		rcu_read_unlock();
-		*ppath = dentry->d_name.name;
-		*ppathlen = dentry->d_name.len;
-		return 0;
+		return clone_dentry_name(dentry, ppath, ppathlen);
 	}
 	rcu_read_unlock();
 	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
 	if (IS_ERR(path))
 		return PTR_ERR(path);
 	*ppath = path;
-	*pfreepath = 1;
 	return 0;
 }
 
@@ -2222,8 +2245,9 @@  static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
 		     ceph_snap(rinode));
 	} else if (rdentry) {
-		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
-					freepath);
+		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino);
+		if (!r)
+			*freepath = 1;
 		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
 		     *ppath);
 	} else if (rpath || rino) {